1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // stringoptions.h 5 // created: 2017jun08 Markus W. Scherer 6 7 #ifndef __STRINGOPTIONS_H__ 8 #define __STRINGOPTIONS_H__ 9 10 #include "unicode/utypes.h" 11 12 /** 13 * @addtogroup icu4c ICU4C 14 * @{ 15 * \file 16 * \brief C API: Bit set option bit constants for various string and character processing functions. 17 */ 18 19 /** 20 * Option value for case folding: Use default mappings defined in CaseFolding.txt. 21 * 22 * \xrefitem stable "Stable" "Stable List" ICU 2.0 23 */ 24 #define U_FOLD_CASE_DEFAULT 0 25 26 /** 27 * Option value for case folding: 28 * 29 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 30 * and dotless i appropriately for Turkic languages (tr, az). 31 * 32 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 33 * are to be included for default mappings and 34 * excluded for the Turkic-specific mappings. 35 * 36 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 37 * are to be excluded for default mappings and 38 * included for the Turkic-specific mappings. 39 * 40 * \xrefitem stable "Stable" "Stable List" ICU 2.0 41 */ 42 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 43 44 /** 45 * Titlecase the string as a whole rather than each word. 46 * (Titlecase only the character at index 0, possibly adjusted.) 47 * Option bits value for titlecasing APIs that take an options bit set. 48 * 49 * It is an error to specify multiple titlecasing iterator options together, 50 * including both an options bit and an explicit BreakIterator. 51 * 52 * @see U_TITLECASE_ADJUST_TO_CASED 53 * \xrefitem stable "Stable" "Stable List" ICU 60 54 */ 55 #define U_TITLECASE_WHOLE_STRING 0x20 56 57 /** 58 * Titlecase sentences rather than words. 59 * (Titlecase only the first character of each sentence, possibly adjusted.) 60 * Option bits value for titlecasing APIs that take an options bit set. 61 * 62 * It is an error to specify multiple titlecasing iterator options together, 63 * including both an options bit and an explicit BreakIterator. 64 * 65 * @see U_TITLECASE_ADJUST_TO_CASED 66 * \xrefitem stable "Stable" "Stable List" ICU 60 67 */ 68 #define U_TITLECASE_SENTENCES 0x40 69 70 /** 71 * Do not lowercase non-initial parts of words when titlecasing. 72 * Option bit for titlecasing APIs that take an options bit set. 73 * 74 * By default, titlecasing will titlecase the character at each 75 * (possibly adjusted) BreakIterator index and 76 * lowercase all other characters up to the next iterator index. 77 * With this option, the other characters will not be modified. 78 * 79 * @see U_TITLECASE_ADJUST_TO_CASED 80 * @see UnicodeString::toTitle 81 * @see CaseMap::toTitle 82 * @see ucasemap_setOptions 83 * @see ucasemap_toTitle 84 * @see ucasemap_utf8ToTitle 85 * \xrefitem stable "Stable" "Stable List" ICU 3.8 86 */ 87 #define U_TITLECASE_NO_LOWERCASE 0x100 88 89 /** 90 * Do not adjust the titlecasing BreakIterator indexes; 91 * titlecase exactly the characters at breaks from the iterator. 92 * Option bit for titlecasing APIs that take an options bit set. 93 * 94 * By default, titlecasing will take each break iterator index, 95 * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), 96 * and titlecase that one. 97 * 98 * Other characters are lowercased. 99 * 100 * It is an error to specify multiple titlecasing adjustment options together. 101 * 102 * @see U_TITLECASE_ADJUST_TO_CASED 103 * @see U_TITLECASE_NO_LOWERCASE 104 * @see UnicodeString::toTitle 105 * @see CaseMap::toTitle 106 * @see ucasemap_setOptions 107 * @see ucasemap_toTitle 108 * @see ucasemap_utf8ToTitle 109 * \xrefitem stable "Stable" "Stable List" ICU 3.8 110 */ 111 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 112 113 /** 114 * Adjust each titlecasing BreakIterator index to the next cased character. 115 * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) 116 * Option bit for titlecasing APIs that take an options bit set. 117 * 118 * This used to be the default index adjustment in ICU. 119 * Since ICU 60, the default index adjustment is to the next character that is 120 * a letter, number, symbol, or private use code point. 121 * (Uncased modifier letters are skipped.) 122 * The difference in behavior is small for word titlecasing, 123 * but the new adjustment is much better for whole-string and sentence titlecasing: 124 * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". 125 * 126 * It is an error to specify multiple titlecasing adjustment options together. 127 * 128 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 129 * \xrefitem stable "Stable" "Stable List" ICU 60 130 */ 131 #define U_TITLECASE_ADJUST_TO_CASED 0x400 132 133 /** 134 * Option for string transformation functions to not first reset the Edits object. 135 * Used for example in some case-mapping and normalization functions. 136 * 137 * @see CaseMap 138 * @see Edits 139 * @see Normalizer2 140 * \xrefitem stable "Stable" "Stable List" ICU 60 141 */ 142 #define U_EDITS_NO_RESET 0x2000 143 144 /** 145 * Omit unchanged text when recording how source substrings 146 * relate to changed and unchanged result substrings. 147 * Used for example in some case-mapping and normalization functions. 148 * 149 * @see CaseMap 150 * @see Edits 151 * @see Normalizer2 152 * \xrefitem stable "Stable" "Stable List" ICU 60 153 */ 154 #define U_OMIT_UNCHANGED_TEXT 0x4000 155 156 /** 157 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 158 * Compare strings in code point order instead of code unit order. 159 * \xrefitem stable "Stable" "Stable List" ICU 2.2 160 */ 161 #define U_COMPARE_CODE_POINT_ORDER 0x8000 162 163 /** 164 * Option bit for unorm_compare: 165 * Perform case-insensitive comparison. 166 * \xrefitem stable "Stable" "Stable List" ICU 2.2 167 */ 168 #define U_COMPARE_IGNORE_CASE 0x10000 169 170 /** 171 * Option bit for unorm_compare: 172 * Both input strings are assumed to fulfill FCD conditions. 173 * \xrefitem stable "Stable" "Stable List" ICU 2.2 174 */ 175 #define UNORM_INPUT_IS_FCD 0x20000 176 177 // Related definitions elsewhere. 178 // Options that are not meaningful in the same functions 179 // can share the same bits. 180 // 181 // Public: 182 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 183 // 184 // Internal: (may change or be removed) 185 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff 186 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 187 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 188 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 189 // ustr_imp.h #define _STRNCMP_STYLE 0x1000 190 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 191 192 #endif // __STRINGOPTIONS_H__ 193 194 /** @} */ // addtogroup 195