xref: /aosp_15_r20/external/icu/libicu/ndk_headers/unicode/stringoptions.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // stringoptions.h
5 // created: 2017jun08 Markus W. Scherer
6 
7 #ifndef __STRINGOPTIONS_H__
8 #define __STRINGOPTIONS_H__
9 
10 #include "unicode/utypes.h"
11 
12 /**
13  * @addtogroup icu4c ICU4C
14  * @{
15  * \file
16  * \brief C API: Bit set option bit constants for various string and character processing functions.
17  */
18 
19 /**
20  * Option value for case folding: Use default mappings defined in CaseFolding.txt.
21  *
22  * \xrefitem stable "Stable" "Stable List" ICU 2.0
23  */
24 #define U_FOLD_CASE_DEFAULT 0
25 
26 /**
27  * Option value for case folding:
28  *
29  * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
30  * and dotless i appropriately for Turkic languages (tr, az).
31  *
32  * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
33  * are to be included for default mappings and
34  * excluded for the Turkic-specific mappings.
35  *
36  * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
37  * are to be excluded for default mappings and
38  * included for the Turkic-specific mappings.
39  *
40  * \xrefitem stable "Stable" "Stable List" ICU 2.0
41  */
42 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
43 
44 /**
45  * Titlecase the string as a whole rather than each word.
46  * (Titlecase only the character at index 0, possibly adjusted.)
47  * Option bits value for titlecasing APIs that take an options bit set.
48  *
49  * It is an error to specify multiple titlecasing iterator options together,
50  * including both an options bit and an explicit BreakIterator.
51  *
52  * @see U_TITLECASE_ADJUST_TO_CASED
53  * \xrefitem stable "Stable" "Stable List" ICU 60
54  */
55 #define U_TITLECASE_WHOLE_STRING 0x20
56 
57 /**
58  * Titlecase sentences rather than words.
59  * (Titlecase only the first character of each sentence, possibly adjusted.)
60  * Option bits value for titlecasing APIs that take an options bit set.
61  *
62  * It is an error to specify multiple titlecasing iterator options together,
63  * including both an options bit and an explicit BreakIterator.
64  *
65  * @see U_TITLECASE_ADJUST_TO_CASED
66  * \xrefitem stable "Stable" "Stable List" ICU 60
67  */
68 #define U_TITLECASE_SENTENCES 0x40
69 
70 /**
71  * Do not lowercase non-initial parts of words when titlecasing.
72  * Option bit for titlecasing APIs that take an options bit set.
73  *
74  * By default, titlecasing will titlecase the character at each
75  * (possibly adjusted) BreakIterator index and
76  * lowercase all other characters up to the next iterator index.
77  * With this option, the other characters will not be modified.
78  *
79  * @see U_TITLECASE_ADJUST_TO_CASED
80  * @see UnicodeString::toTitle
81  * @see CaseMap::toTitle
82  * @see ucasemap_setOptions
83  * @see ucasemap_toTitle
84  * @see ucasemap_utf8ToTitle
85  * \xrefitem stable "Stable" "Stable List" ICU 3.8
86  */
87 #define U_TITLECASE_NO_LOWERCASE 0x100
88 
89 /**
90  * Do not adjust the titlecasing BreakIterator indexes;
91  * titlecase exactly the characters at breaks from the iterator.
92  * Option bit for titlecasing APIs that take an options bit set.
93  *
94  * By default, titlecasing will take each break iterator index,
95  * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
96  * and titlecase that one.
97  *
98  * Other characters are lowercased.
99  *
100  * It is an error to specify multiple titlecasing adjustment options together.
101  *
102  * @see U_TITLECASE_ADJUST_TO_CASED
103  * @see U_TITLECASE_NO_LOWERCASE
104  * @see UnicodeString::toTitle
105  * @see CaseMap::toTitle
106  * @see ucasemap_setOptions
107  * @see ucasemap_toTitle
108  * @see ucasemap_utf8ToTitle
109  * \xrefitem stable "Stable" "Stable List" ICU 3.8
110  */
111 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
112 
113 /**
114  * Adjust each titlecasing BreakIterator index to the next cased character.
115  * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
116  * Option bit for titlecasing APIs that take an options bit set.
117  *
118  * This used to be the default index adjustment in ICU.
119  * Since ICU 60, the default index adjustment is to the next character that is
120  * a letter, number, symbol, or private use code point.
121  * (Uncased modifier letters are skipped.)
122  * The difference in behavior is small for word titlecasing,
123  * but the new adjustment is much better for whole-string and sentence titlecasing:
124  * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
125  *
126  * It is an error to specify multiple titlecasing adjustment options together.
127  *
128  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
129  * \xrefitem stable "Stable" "Stable List" ICU 60
130  */
131 #define U_TITLECASE_ADJUST_TO_CASED 0x400
132 
133 /**
134  * Option for string transformation functions to not first reset the Edits object.
135  * Used for example in some case-mapping and normalization functions.
136  *
137  * @see CaseMap
138  * @see Edits
139  * @see Normalizer2
140  * \xrefitem stable "Stable" "Stable List" ICU 60
141  */
142 #define U_EDITS_NO_RESET 0x2000
143 
144 /**
145  * Omit unchanged text when recording how source substrings
146  * relate to changed and unchanged result substrings.
147  * Used for example in some case-mapping and normalization functions.
148  *
149  * @see CaseMap
150  * @see Edits
151  * @see Normalizer2
152  * \xrefitem stable "Stable" "Stable List" ICU 60
153  */
154 #define U_OMIT_UNCHANGED_TEXT 0x4000
155 
156 /**
157  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
158  * Compare strings in code point order instead of code unit order.
159  * \xrefitem stable "Stable" "Stable List" ICU 2.2
160  */
161 #define U_COMPARE_CODE_POINT_ORDER  0x8000
162 
163 /**
164  * Option bit for unorm_compare:
165  * Perform case-insensitive comparison.
166  * \xrefitem stable "Stable" "Stable List" ICU 2.2
167  */
168 #define U_COMPARE_IGNORE_CASE       0x10000
169 
170 /**
171  * Option bit for unorm_compare:
172  * Both input strings are assumed to fulfill FCD conditions.
173  * \xrefitem stable "Stable" "Stable List" ICU 2.2
174  */
175 #define UNORM_INPUT_IS_FCD          0x20000
176 
177 // Related definitions elsewhere.
178 // Options that are not meaningful in the same functions
179 // can share the same bits.
180 //
181 // Public:
182 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
183 //
184 // Internal: (may change or be removed)
185 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
186 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
187 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
188 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
189 // ustr_imp.h #define _STRNCMP_STYLE 0x1000
190 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000
191 
192 #endif  // __STRINGOPTIONS_H__
193 
194 /** @} */ // addtogroup
195