1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 1997-2011,2014-2015 International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   Date        Name        Description
9 *   06/21/00    aliu        Creation.
10 *******************************************************************************
11 */
12 
13 #ifndef UTRANS_H
14 #define UTRANS_H
15 
16 #include "unicode/utypes.h"
17 
18 #if !UCONFIG_NO_TRANSLITERATION
19 
20 #include "unicode/urep.h"
21 #include "unicode/parseerr.h"
22 #include "unicode/uenum.h"
23 
24 #if U_SHOW_CPLUSPLUS_API
25 #include "unicode/localpointer.h"
26 #endif   // U_SHOW_CPLUSPLUS_API
27 
28 /********************************************************************
29  * General Notes
30  ********************************************************************
31  */
32 /**
33  * @addtogroup icu4c ICU4C
34  * @{
35  * \file
36  * \brief C API: Transliterator
37  *
38  * <h2> Transliteration </h2>
39  * The data structures and functions described in this header provide
40  * transliteration services.  Transliteration services are implemented
41  * as C++ classes.  The comments and documentation in this header
42  * assume the reader is familiar with the C++ headers translit.h and
43  * associated documentation.
44  *
45  * A significant but incomplete subset of the C++ transliteration
46  * services are available to C code through this header.  In order to
47  * access more complex transliteration services, refer to the C++
48  * headers and documentation.
49  *
50  * There are two sets of functions for working with transliterator IDs:
51  *
52  * An old, deprecated set uses char * IDs, which works for true and pure
53  * identifiers that these APIs were designed for,
54  * for example "Cyrillic-Latin".
55  * It does not work when the ID contains filters ("[:Script=Cyrl:]")
56  * or even a complete set of rules because then the ID string contains more
57  * than just "invariant" characters (see utypes.h).
58  *
59  * A new set of functions replaces the old ones and uses UChar * IDs,
60  * paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.)
61  */
62 
63 /********************************************************************
64  * Data Structures
65  ********************************************************************/
66 
67 /**
68  * An opaque transliterator for use in C.  Open with utrans_openxxx()
69  * and close with utrans_close() when done.  Equivalent to the C++ class
70  * Transliterator and its subclasses.
71  * @see Transliterator
72  * \xrefitem stable "Stable" "Stable List" ICU 2.0
73  */
74 typedef void* UTransliterator;
75 
76 /**
77  * Direction constant indicating the direction in a transliterator,
78  * e.g., the forward or reverse rules of a RuleBasedTransliterator.
79  * Specified when a transliterator is opened.  An "A-B" transliterator
80  * transliterates A to B when operating in the forward direction, and
81  * B to A when operating in the reverse direction.
82  * \xrefitem stable "Stable" "Stable List" ICU 2.0
83  */
84 typedef enum UTransDirection {
85 
86     /**
87      * UTRANS_FORWARD means from &lt;source&gt; to &lt;target&gt; for a
88      * transliterator with ID &lt;source&gt;-&lt;target&gt;.  For a transliterator
89      * opened using a rule, it means forward direction rules, e.g.,
90      * "A > B".
91      */
92     UTRANS_FORWARD,
93 
94     /**
95      * UTRANS_REVERSE means from &lt;target&gt; to &lt;source&gt; for a
96      * transliterator with ID &lt;source&gt;-&lt;target&gt;.  For a transliterator
97      * opened using a rule, it means reverse direction rules, e.g.,
98      * "A < B".
99      */
100     UTRANS_REVERSE
101 
102 } UTransDirection;
103 
104 /**
105  * Position structure for utrans_transIncremental() incremental
106  * transliteration.  This structure defines two substrings of the text
107  * being transliterated.  The first region, [contextStart,
108  * contextLimit), defines what characters the transliterator will read
109  * as context.  The second region, [start, limit), defines what
110  * characters will actually be transliterated.  The second region
111  * should be a subset of the first.
112  *
113  * <p>After a transliteration operation, some of the indices in this
114  * structure will be modified.  See the field descriptions for
115  * details.
116  *
117  * <p>contextStart <= start <= limit <= contextLimit
118  *
119  * <p>Note: All index values in this structure must be at code point
120  * boundaries.  That is, none of them may occur between two code units
121  * of a surrogate pair.  If any index does split a surrogate pair,
122  * results are unspecified.
123  *
124  * \xrefitem stable "Stable" "Stable List" ICU 2.0
125  */
126 typedef struct UTransPosition {
127 
128     /**
129      * Beginning index, inclusive, of the context to be considered for
130      * a transliteration operation.  The transliterator will ignore
131      * anything before this index.  INPUT/OUTPUT parameter: This parameter
132      * is updated by a transliteration operation to reflect the maximum
133      * amount of antecontext needed by a transliterator.
134      * \xrefitem stable "Stable" "Stable List" ICU 2.4
135      */
136     int32_t contextStart;
137 
138     /**
139      * Ending index, exclusive, of the context to be considered for a
140      * transliteration operation.  The transliterator will ignore
141      * anything at or after this index.  INPUT/OUTPUT parameter: This
142      * parameter is updated to reflect changes in the length of the
143      * text, but points to the same logical position in the text.
144      * \xrefitem stable "Stable" "Stable List" ICU 2.4
145      */
146     int32_t contextLimit;
147 
148     /**
149      * Beginning index, inclusive, of the text to be transliterated.
150      * INPUT/OUTPUT parameter: This parameter is advanced past
151      * characters that have already been transliterated by a
152      * transliteration operation.
153      * \xrefitem stable "Stable" "Stable List" ICU 2.4
154      */
155     int32_t start;
156 
157     /**
158      * Ending index, exclusive, of the text to be transliterated.
159      * INPUT/OUTPUT parameter: This parameter is updated to reflect
160      * changes in the length of the text, but points to the same
161      * logical position in the text.
162      * \xrefitem stable "Stable" "Stable List" ICU 2.4
163      */
164     int32_t limit;
165 
166 } UTransPosition;
167 
168 /********************************************************************
169  * General API
170  ********************************************************************/
171 
172 /**
173  * Open a custom transliterator, given a custom rules string
174  * OR
175  * a system transliterator, given its ID.
176  * Any non-NULL result from this function should later be closed with
177  * utrans_close().
178  *
179  * @param id a valid transliterator ID
180  * @param idLength the length of the ID string, or -1 if NUL-terminated
181  * @param dir the desired direction
182  * @param rules the transliterator rules.  See the C++ header rbt.h for
183  *              rules syntax. If NULL then a system transliterator matching
184  *              the ID is returned.
185  * @param rulesLength the length of the rules, or -1 if the rules
186  *                    are NUL-terminated.
187  * @param parseError a pointer to a UParseError struct to receive the details
188  *                   of any parsing errors. This parameter may be NULL if no
189  *                   parsing error details are desired.
190  * @param pErrorCode a pointer to the UErrorCode
191  * @return a transliterator pointer that may be passed to other
192  *         utrans_xxx() functions, or NULL if the open call fails.
193  * \xrefitem stable "Stable" "Stable List" ICU 2.8
194  */
195 U_CAPI UTransliterator* U_EXPORT2
196 utrans_openU(const UChar *id,
197              int32_t idLength,
198              UTransDirection dir,
199              const UChar *rules,
200              int32_t rulesLength,
201              UParseError *parseError,
202              UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__);
203 
204 
205 
206 /**
207  * Open an inverse of an existing transliterator.  For this to work,
208  * the inverse must be registered with the system.  For example, if
209  * the Transliterator "A-B" is opened, and then its inverse is opened,
210  * the result is the Transliterator "B-A", if such a transliterator is
211  * registered with the system.  Otherwise the result is NULL and a
212  * failing UErrorCode is set.  Any non-NULL result from this function
213  * should later be closed with utrans_close().
214  *
215  * @param trans the transliterator to open the inverse of.
216  * @param status a pointer to the UErrorCode
217  * @return a pointer to a newly-opened transliterator that is the
218  * inverse of trans, or NULL if the open call fails.
219  * \xrefitem stable "Stable" "Stable List" ICU 2.0
220  */
221 U_CAPI UTransliterator* U_EXPORT2
222 utrans_openInverse(const UTransliterator* trans,
223                    UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
224 
225 
226 
227 /**
228  * Create a copy of a transliterator.  Any non-NULL result from this
229  * function should later be closed with utrans_close().
230  *
231  * @param trans the transliterator to be copied.
232  * @param status a pointer to the UErrorCode
233  * @return a transliterator pointer that may be passed to other
234  * utrans_xxx() functions, or NULL if the clone call fails.
235  * \xrefitem stable "Stable" "Stable List" ICU 2.0
236  */
237 U_CAPI UTransliterator* U_EXPORT2
238 utrans_clone(const UTransliterator* trans,
239              UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
240 
241 
242 
243 /**
244  * Close a transliterator.  Any non-NULL pointer returned by
245  * utrans_openXxx() or utrans_clone() should eventually be closed.
246  * @param trans the transliterator to be closed.
247  * \xrefitem stable "Stable" "Stable List" ICU 2.0
248  */
249 U_CAPI void U_EXPORT2
250 utrans_close(UTransliterator* trans) __INTRODUCED_IN(__ANDROID_API_T__);
251 
252 
253 
254 #if U_SHOW_CPLUSPLUS_API
255 
256 U_NAMESPACE_BEGIN
257 
258 /**
259  * \class LocalUTransliteratorPointer
260  * "Smart pointer" class, closes a UTransliterator via utrans_close().
261  * For most methods see the LocalPointerBase base class.
262  *
263  * @see LocalPointerBase
264  * @see LocalPointer
265  * \xrefitem stable "Stable" "Stable List" ICU 4.4
266  */
267 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTransliteratorPointer, UTransliterator, utrans_close);
268 
269 U_NAMESPACE_END
270 
271 #endif
272 
273 
274 
275 
276 
277 
278 
279 /**
280  * Set the filter used by a transliterator.  A filter can be used to
281  * make the transliterator pass certain characters through untouched.
282  * The filter is expressed using a UnicodeSet pattern.  If the
283  * filterPattern is NULL or the empty string, then the transliterator
284  * will be reset to use no filter.
285  *
286  * @param trans the transliterator
287  * @param filterPattern a pattern string, in the form accepted by
288  * UnicodeSet, specifying which characters to apply the
289  * transliteration to.  May be NULL or the empty string to indicate no
290  * filter.
291  * @param filterPatternLen the length of filterPattern, or -1 if
292  * filterPattern is zero-terminated
293  * @param status a pointer to the UErrorCode
294  * @see UnicodeSet
295  * \xrefitem stable "Stable" "Stable List" ICU 2.0
296  */
297 U_CAPI void U_EXPORT2
298 utrans_setFilter(UTransliterator* trans,
299                  const UChar* filterPattern,
300                  int32_t filterPatternLen,
301                  UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
302 
303 
304 
305 
306 
307 /**
308  * Return a UEnumeration for the available transliterators.
309  *
310  * @param pErrorCode Pointer to the UErrorCode in/out parameter.
311  * @return UEnumeration for the available transliterators.
312  *         Close with uenum_close().
313  *
314  * \xrefitem stable "Stable" "Stable List" ICU 2.8
315  */
316 U_CAPI UEnumeration * U_EXPORT2
317 utrans_openIDs(UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__);
318 
319 
320 
321 /********************************************************************
322  * Transliteration API
323  ********************************************************************/
324 
325 /**
326  * Transliterate a segment of a UReplaceable string.  The string is
327  * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks
328  * function pointer struct repFunc.  Functions in the repFunc struct
329  * will be called in order to modify the rep string.
330  *
331  * @param trans the transliterator
332  * @param rep a pointer to the string.  This will be passed to the
333  * repFunc functions.
334  * @param repFunc a set of function pointers that will be used to
335  * modify the string pointed to by rep.
336  * @param start the beginning index, inclusive; <code>0 <= start <=
337  * limit</code>.
338  * @param limit pointer to the ending index, exclusive; <code>start <=
339  * limit <= repFunc->length(rep)</code>.  Upon return, *limit will
340  * contain the new limit index.  The text previously occupying
341  * <code>[start, limit)</code> has been transliterated, possibly to a
342  * string of a different length, at <code>[start,
343  * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em>
344  * is the return value.
345  * @param status a pointer to the UErrorCode
346  * \xrefitem stable "Stable" "Stable List" ICU 2.0
347  */
348 U_CAPI void U_EXPORT2
349 utrans_trans(const UTransliterator* trans,
350              UReplaceable* rep,
351              const UReplaceableCallbacks* repFunc,
352              int32_t start,
353              int32_t* limit,
354              UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
355 
356 
357 
358 /**
359  * Transliterate the portion of the UReplaceable text buffer that can
360  * be transliterated unambiguously.  This method is typically called
361  * after new text has been inserted, e.g. as a result of a keyboard
362  * event.  The transliterator will try to transliterate characters of
363  * <code>rep</code> between <code>index.cursor</code> and
364  * <code>index.limit</code>.  Characters before
365  * <code>index.cursor</code> will not be changed.
366  *
367  * <p>Upon return, values in <code>index</code> will be updated.
368  * <code>index.start</code> will be advanced to the first
369  * character that future calls to this method will read.
370  * <code>index.cursor</code> and <code>index.limit</code> will
371  * be adjusted to delimit the range of text that future calls to
372  * this method may change.
373  *
374  * <p>Typical usage of this method begins with an initial call
375  * with <code>index.start</code> and <code>index.limit</code>
376  * set to indicate the portion of <code>text</code> to be
377  * transliterated, and <code>index.cursor == index.start</code>.
378  * Thereafter, <code>index</code> can be used without
379  * modification in future calls, provided that all changes to
380  * <code>text</code> are made via this method.
381  *
382  * <p>This method assumes that future calls may be made that will
383  * insert new text into the buffer.  As a result, it only performs
384  * unambiguous transliterations.  After the last call to this method,
385  * there may be untransliterated text that is waiting for more input
386  * to resolve an ambiguity.  In order to perform these pending
387  * transliterations, clients should call utrans_trans() with a start
388  * of index.start and a limit of index.end after the last call to this
389  * method has been made.
390  *
391  * @param trans the transliterator
392  * @param rep a pointer to the string.  This will be passed to the
393  * repFunc functions.
394  * @param repFunc a set of function pointers that will be used to
395  * modify the string pointed to by rep.
396  * @param pos a struct containing the start and limit indices of the
397  * text to be read and the text to be transliterated
398  * @param status a pointer to the UErrorCode
399  * \xrefitem stable "Stable" "Stable List" ICU 2.0
400  */
401 U_CAPI void U_EXPORT2
402 utrans_transIncremental(const UTransliterator* trans,
403                         UReplaceable* rep,
404                         const UReplaceableCallbacks* repFunc,
405                         UTransPosition* pos,
406                         UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
407 
408 
409 
410 /**
411  * Transliterate a segment of a UChar* string.  The string is passed
412  * in in a UChar* buffer.  The string is modified in place.  If the
413  * result is longer than textCapacity, it is truncated.  The actual
414  * length of the result is returned in *textLength, if textLength is
415  * non-NULL. *textLength may be greater than textCapacity, but only
416  * textCapacity UChars will be written to *text, including the zero
417  * terminator.
418  *
419  * @param trans the transliterator
420  * @param text a pointer to a buffer containing the text to be
421  * transliterated on input and the result text on output.
422  * @param textLength a pointer to the length of the string in text.
423  * If the length is -1 then the string is assumed to be
424  * zero-terminated.  Upon return, the new length is stored in
425  * *textLength.  If textLength is NULL then the string is assumed to
426  * be zero-terminated.
427  * @param textCapacity the length of the text buffer
428  * @param start the beginning index, inclusive; <code>0 <= start <=
429  * limit</code>.
430  * @param limit pointer to the ending index, exclusive; <code>start <=
431  * limit <= repFunc->length(rep)</code>.  Upon return, *limit will
432  * contain the new limit index.  The text previously occupying
433  * <code>[start, limit)</code> has been transliterated, possibly to a
434  * string of a different length, at <code>[start,
435  * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em>
436  * is the return value.
437  * @param status a pointer to the UErrorCode
438  * \xrefitem stable "Stable" "Stable List" ICU 2.0
439  */
440 U_CAPI void U_EXPORT2
441 utrans_transUChars(const UTransliterator* trans,
442                    UChar* text,
443                    int32_t* textLength,
444                    int32_t textCapacity,
445                    int32_t start,
446                    int32_t* limit,
447                    UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
448 
449 
450 
451 /**
452  * Transliterate the portion of the UChar* text buffer that can be
453  * transliterated unambiguously.  See utrans_transIncremental().  The
454  * string is passed in in a UChar* buffer.  The string is modified in
455  * place.  If the result is longer than textCapacity, it is truncated.
456  * The actual length of the result is returned in *textLength, if
457  * textLength is non-NULL. *textLength may be greater than
458  * textCapacity, but only textCapacity UChars will be written to
459  * *text, including the zero terminator.  See utrans_transIncremental()
460  * for usage details.
461  *
462  * @param trans the transliterator
463  * @param text a pointer to a buffer containing the text to be
464  * transliterated on input and the result text on output.
465  * @param textLength a pointer to the length of the string in text.
466  * If the length is -1 then the string is assumed to be
467  * zero-terminated.  Upon return, the new length is stored in
468  * *textLength.  If textLength is NULL then the string is assumed to
469  * be zero-terminated.
470  * @param textCapacity the length of the text buffer
471  * @param pos a struct containing the start and limit indices of the
472  * text to be read and the text to be transliterated
473  * @param status a pointer to the UErrorCode
474  * @see utrans_transIncremental
475  * \xrefitem stable "Stable" "Stable List" ICU 2.0
476  */
477 U_CAPI void U_EXPORT2
478 utrans_transIncrementalUChars(const UTransliterator* trans,
479                               UChar* text,
480                               int32_t* textLength,
481                               int32_t textCapacity,
482                               UTransPosition* pos,
483                               UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
484 
485 
486 
487 /**
488  * Create a rule string that can be passed to utrans_openU to recreate this
489  * transliterator.
490  *
491  * @param trans     The transliterator
492  * @param escapeUnprintable if true then convert unprintable characters to their
493  *                  hex escape representations, \\uxxxx or \\Uxxxxxxxx.
494  *                  Unprintable characters are those other than
495  *                  U+000A, U+0020..U+007E.
496  * @param result    A pointer to a buffer to receive the rules.
497  * @param resultLength The maximum size of result.
498  * @param status    A pointer to the UErrorCode. In case of error status, the
499  *                  contents of result are undefined.
500  * @return int32_t   The length of the rule string (may be greater than resultLength,
501  *                  in which case an error is returned).
502  * \xrefitem stable "Stable" "Stable List" ICU 53
503  */
504 U_CAPI int32_t U_EXPORT2
505 utrans_toRules(     const UTransliterator* trans,
506                     UBool escapeUnprintable,
507                     UChar* result, int32_t resultLength,
508                     UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
509 
510 
511 
512 
513 
514 /* deprecated API ----------------------------------------------------------- */
515 
516 #ifndef U_HIDE_DEPRECATED_API
517 
518 /* see utrans.h documentation for why these functions are deprecated */
519 
520 
521 
522 
523 
524 
525 
526 
527 
528 #endif  /* U_HIDE_DEPRECATED_API */
529 
530 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
531 
532 #endif
533 
534 /** @} */ // addtogroup
535