1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1997-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * File UCHAR.H 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 04/02/97 aliu Creation. 15 * 03/29/99 helena Updated for C APIs. 16 * 4/15/99 Madhu Updated for C Implementation and Javadoc 17 * 5/20/99 Madhu Added the function u_getVersion() 18 * 8/19/1999 srl Upgraded scripts to Unicode 3.0 19 * 8/27/1999 schererm UCharDirection constants: U_... 20 * 11/11/1999 weiv added u_isalnum(), cleaned comments 21 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). 22 ****************************************************************************** 23 */ 24 25 #ifndef UCHAR_H 26 #define UCHAR_H 27 28 #include <stdbool.h> 29 #include "unicode/utypes.h" 30 #include "unicode/stringoptions.h" 31 #include "unicode/ucpmap.h" 32 33 #if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) 34 35 #define USET_DEFINED 36 37 /** 38 * USet is the C API type corresponding to C++ class UnicodeSet. 39 * It is forward-declared here to avoid including unicode/uset.h file if related 40 * APIs are not used. 41 * 42 * @see ucnv_getUnicodeSet 43 * \xrefitem stable "Stable" "Stable List" ICU 2.4 44 */ 45 typedef struct USet USet; 46 47 #endif 48 49 50 U_CDECL_BEGIN 51 52 /*==========================================================================*/ 53 /* Unicode version number */ 54 /*==========================================================================*/ 55 /** 56 * Unicode version number, default for the current ICU version. 57 * The actual Unicode Character Database (UCD) data is stored in uprops.dat 58 * and may be generated from UCD files from a different Unicode version. 59 * Call u_getUnicodeVersion to get the actual Unicode version of the data. 60 * 61 * @see u_getUnicodeVersion 62 * \xrefitem stable "Stable" "Stable List" ICU 2.0 63 */ 64 #define U_UNICODE_VERSION "15.1" 65 66 /** 67 * @addtogroup icu4c ICU4C 68 * @{ 69 * \file 70 * \brief C API: Unicode Properties 71 * 72 * This C API provides low-level access to the Unicode Character Database. 73 * In addition to raw property values, some convenience functions calculate 74 * derived properties, for example for Java-style programming. 75 * 76 * Unicode assigns each code point (not just assigned character) values for 77 * many properties. 78 * Most of them are simple boolean flags, or constants from a small enumerated list. 79 * For some properties, values are strings or other relatively more complex types. 80 * 81 * For more information see 82 * "About the Unicode Character Database" (http://www.unicode.org/ucd/) 83 * and the ICU User Guide chapter on Properties (https://unicode-org.github.io/icu/userguide/strings/properties). 84 * 85 * Many properties are accessible via generic functions that take a UProperty selector. 86 * - u_hasBinaryProperty() returns a binary value (true/false) per property and code point. 87 * - u_getIntPropertyValue() returns an integer value per property and code point. 88 * For each supported enumerated or catalog property, there is 89 * an enum type for all of the property's values, and 90 * u_getIntPropertyValue() returns the numeric values of those constants. 91 * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with 92 * all code points for which the property is true. 93 * - u_getIntPropertyMap() returns a map for each 94 * ICU-supported enumerated/catalog/int-valued property which 95 * maps all Unicode code points to their values for that property. 96 * 97 * Many functions are designed to match java.lang.Character functions. 98 * See the individual function documentation, 99 * and see the JDK 1.4 java.lang.Character documentation 100 * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html 101 * 102 * There are also functions that provide easy migration from C/POSIX functions 103 * like isblank(). Their use is generally discouraged because the C/POSIX 104 * standards do not define their semantics beyond the ASCII range, which means 105 * that different implementations exhibit very different behavior. 106 * Instead, Unicode properties should be used directly. 107 * 108 * There are also only a few, broad C/POSIX character classes, and they tend 109 * to be used for conflicting purposes. For example, the "isalpha()" class 110 * is sometimes used to determine word boundaries, while a more sophisticated 111 * approach would at least distinguish initial letters from continuation 112 * characters (the latter including combining marks). 113 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 114 * Another example: There is no "istitle()" class for titlecase characters. 115 * 116 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 117 * ICU implements them according to the Standard Recommendations in 118 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 119 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 120 * 121 * API access for C/POSIX character classes is as follows: 122 * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) 123 * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) 124 * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) 125 * - punct: u_ispunct(c) 126 * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER 127 * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) 128 * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) 129 * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) 130 * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) 131 * - cntrl: u_charType(c)==U_CONTROL_CHAR 132 * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) 133 * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) 134 * 135 * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, 136 * the Standard Recommendations in UTS #18. Instead, they match Java 137 * functions according to their API documentation. 138 * 139 * \htmlonly 140 * The C/POSIX character classes are also available in UnicodeSet patterns, 141 * using patterns like [:graph:] or \p{graph}. 142 * \endhtmlonly 143 * 144 * Note: There are several ICU whitespace functions. 145 * Comparison: 146 * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 147 * most of general categories "Z" (separators) + most whitespace ISO controls 148 * (including no-break spaces, but excluding IS1..IS4) 149 * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 150 * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) 151 * - u_isspace: Z + whitespace ISO controls (including no-break spaces) 152 * - u_isblank: "horizontal spaces" = TAB + Zs 153 */ 154 155 /** 156 * Constants. 157 */ 158 159 /** The lowest Unicode code point value. Code points are non-negative. \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 160 #define UCHAR_MIN_VALUE 0 161 162 /** 163 * The highest Unicode code point value (scalar value) according to 164 * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). 165 * For a single character, UChar32 is a simple type that can hold any code point value. 166 * 167 * @see UChar32 168 * \xrefitem stable "Stable" "Stable List" ICU 2.0 169 */ 170 #define UCHAR_MAX_VALUE 0x10ffff 171 172 /** 173 * Get a single-bit bit set (a flag) from a bit number 0..31. 174 * \xrefitem stable "Stable" "Stable List" ICU 2.1 175 */ 176 #define U_MASK(x) ((uint32_t)1<<(x)) 177 178 /** 179 * Selection constants for Unicode properties. 180 * These constants are used in functions like u_hasBinaryProperty to select 181 * one of the Unicode properties. 182 * 183 * The properties APIs are intended to reflect Unicode properties as defined 184 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). 185 * 186 * For details about the properties see 187 * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/). 188 * 189 * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, 190 * then properties marked with "new in Unicode 3.2" are not or not fully available. 191 * Check u_getUnicodeVersion to be sure. 192 * 193 * @see u_hasBinaryProperty 194 * @see u_getIntPropertyValue 195 * @see u_getUnicodeVersion 196 * \xrefitem stable "Stable" "Stable List" ICU 2.1 197 */ 198 typedef enum UProperty { 199 /* 200 * Note: UProperty constants are parsed by preparseucd.py. 201 * It matches lines like 202 * UCHAR_<Unicode property name>=<integer>, 203 */ 204 205 /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that 206 debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, 207 rather than UCHAR_BINARY_START. Likewise for other *_START 208 identifiers. */ 209 210 /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. 211 Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 212 UCHAR_ALPHABETIC=0, 213 /** First constant for binary Unicode properties. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 214 UCHAR_BINARY_START=UCHAR_ALPHABETIC, 215 /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 216 UCHAR_ASCII_HEX_DIGIT=1, 217 /** Binary property Bidi_Control. 218 Format controls which have specific functions 219 in the Bidi Algorithm. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 220 UCHAR_BIDI_CONTROL=2, 221 /** Binary property Bidi_Mirrored. 222 Characters that may change display in RTL text. 223 Same as u_isMirrored. 224 See Bidi Algorithm, UTR 9. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 225 UCHAR_BIDI_MIRRORED=3, 226 /** Binary property Dash. Variations of dashes. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 227 UCHAR_DASH=4, 228 /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). 229 Ignorable in most processing. 230 <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 231 UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, 232 /** Binary property Deprecated (new in Unicode 3.2). 233 The usage of deprecated characters is strongly discouraged. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 234 UCHAR_DEPRECATED=6, 235 /** Binary property Diacritic. Characters that linguistically modify 236 the meaning of another character to which they apply. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 237 UCHAR_DIACRITIC=7, 238 /** Binary property Extender. 239 Extend the value or shape of a preceding alphabetic character, 240 e.g., length and iteration marks. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 241 UCHAR_EXTENDER=8, 242 /** Binary property Full_Composition_Exclusion. 243 CompositionExclusions.txt+Singleton Decompositions+ 244 Non-Starter Decompositions. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 245 UCHAR_FULL_COMPOSITION_EXCLUSION=9, 246 /** Binary property Grapheme_Base (new in Unicode 3.2). 247 For programmatic determination of grapheme cluster boundaries. 248 [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 249 UCHAR_GRAPHEME_BASE=10, 250 /** Binary property Grapheme_Extend (new in Unicode 3.2). 251 For programmatic determination of grapheme cluster boundaries. 252 Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 253 UCHAR_GRAPHEME_EXTEND=11, 254 /** Binary property Grapheme_Link (new in Unicode 3.2). 255 For programmatic determination of grapheme cluster boundaries. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 256 UCHAR_GRAPHEME_LINK=12, 257 /** Binary property Hex_Digit. 258 Characters commonly used for hexadecimal numbers. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 259 UCHAR_HEX_DIGIT=13, 260 /** Binary property Hyphen. Dashes used to mark connections 261 between pieces of words, plus the Katakana middle dot. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 262 UCHAR_HYPHEN=14, 263 /** Binary property ID_Continue. 264 Characters that can continue an identifier. 265 DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." 266 ID_Start+Mn+Mc+Nd+Pc \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 267 UCHAR_ID_CONTINUE=15, 268 /** Binary property ID_Start. 269 Characters that can start an identifier. 270 Lu+Ll+Lt+Lm+Lo+Nl \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 271 UCHAR_ID_START=16, 272 /** Binary property Ideographic. 273 CJKV ideographs. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 274 UCHAR_IDEOGRAPHIC=17, 275 /** Binary property IDS_Binary_Operator (new in Unicode 3.2). 276 For programmatic determination of 277 Ideographic Description Sequences. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 278 UCHAR_IDS_BINARY_OPERATOR=18, 279 /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). 280 For programmatic determination of 281 Ideographic Description Sequences. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 282 UCHAR_IDS_TRINARY_OPERATOR=19, 283 /** Binary property Join_Control. 284 Format controls for cursive joining and ligation. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 285 UCHAR_JOIN_CONTROL=20, 286 /** Binary property Logical_Order_Exception (new in Unicode 3.2). 287 Characters that do not use logical order and 288 require special handling in most processing. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 289 UCHAR_LOGICAL_ORDER_EXCEPTION=21, 290 /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. 291 Ll+Other_Lowercase \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 292 UCHAR_LOWERCASE=22, 293 /** Binary property Math. Sm+Other_Math \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 294 UCHAR_MATH=23, 295 /** Binary property Noncharacter_Code_Point. 296 Code points that are explicitly defined as illegal 297 for the encoding of characters. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 298 UCHAR_NONCHARACTER_CODE_POINT=24, 299 /** Binary property Quotation_Mark. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 300 UCHAR_QUOTATION_MARK=25, 301 /** Binary property Radical (new in Unicode 3.2). 302 For programmatic determination of 303 Ideographic Description Sequences. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 304 UCHAR_RADICAL=26, 305 /** Binary property Soft_Dotted (new in Unicode 3.2). 306 Characters with a "soft dot", like i or j. 307 An accent placed on these characters causes 308 the dot to disappear. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 309 UCHAR_SOFT_DOTTED=27, 310 /** Binary property Terminal_Punctuation. 311 Punctuation characters that generally mark 312 the end of textual units. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 313 UCHAR_TERMINAL_PUNCTUATION=28, 314 /** Binary property Unified_Ideograph (new in Unicode 3.2). 315 For programmatic determination of 316 Ideographic Description Sequences. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 317 UCHAR_UNIFIED_IDEOGRAPH=29, 318 /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. 319 Lu+Other_Uppercase \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 320 UCHAR_UPPERCASE=30, 321 /** Binary property White_Space. 322 Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. 323 Space characters+TAB+CR+LF-ZWSP-ZWNBSP \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 324 UCHAR_WHITE_SPACE=31, 325 /** Binary property XID_Continue. 326 ID_Continue modified to allow closure under 327 normalization forms NFKC and NFKD. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 328 UCHAR_XID_CONTINUE=32, 329 /** Binary property XID_Start. ID_Start modified to allow 330 closure under normalization forms NFKC and NFKD. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 331 UCHAR_XID_START=33, 332 /** Binary property Case_Sensitive. Either the source of a case 333 mapping or _in_ the target of a case mapping. Not the same as 334 the general category Cased_Letter. \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 335 UCHAR_CASE_SENSITIVE=34, 336 /** Binary property STerm (new in Unicode 4.0.1). 337 Sentence Terminal. Used in UAX #29: Text Boundaries 338 (http://www.unicode.org/reports/tr29/) 339 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 340 UCHAR_S_TERM=35, 341 /** Binary property Variation_Selector (new in Unicode 4.0.1). 342 Indicates all those characters that qualify as Variation Selectors. 343 For details on the behavior of these characters, 344 see StandardizedVariants.html and 15.6 Variation Selectors. 345 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 346 UCHAR_VARIATION_SELECTOR=36, 347 /** Binary property NFD_Inert. 348 ICU-specific property for characters that are inert under NFD, 349 i.e., they do not interact with adjacent characters. 350 See the documentation for the Normalizer2 class and the 351 Normalizer2::isInert() method. 352 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 353 UCHAR_NFD_INERT=37, 354 /** Binary property NFKD_Inert. 355 ICU-specific property for characters that are inert under NFKD, 356 i.e., they do not interact with adjacent characters. 357 See the documentation for the Normalizer2 class and the 358 Normalizer2::isInert() method. 359 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 360 UCHAR_NFKD_INERT=38, 361 /** Binary property NFC_Inert. 362 ICU-specific property for characters that are inert under NFC, 363 i.e., they do not interact with adjacent characters. 364 See the documentation for the Normalizer2 class and the 365 Normalizer2::isInert() method. 366 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 367 UCHAR_NFC_INERT=39, 368 /** Binary property NFKC_Inert. 369 ICU-specific property for characters that are inert under NFKC, 370 i.e., they do not interact with adjacent characters. 371 See the documentation for the Normalizer2 class and the 372 Normalizer2::isInert() method. 373 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 374 UCHAR_NFKC_INERT=40, 375 /** Binary Property Segment_Starter. 376 ICU-specific property for characters that are starters in terms of 377 Unicode normalization and combining character sequences. 378 They have ccc=0 and do not occur in non-initial position of the 379 canonical decomposition of any character 380 (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). 381 ICU uses this property for segmenting a string for generating a set of 382 canonically equivalent strings, e.g. for canonical closure while 383 processing collation tailoring rules. 384 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 385 UCHAR_SEGMENT_STARTER=41, 386 /** Binary property Pattern_Syntax (new in Unicode 4.1). 387 See UAX #31 Identifier and Pattern Syntax 388 (http://www.unicode.org/reports/tr31/) 389 \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 390 UCHAR_PATTERN_SYNTAX=42, 391 /** Binary property Pattern_White_Space (new in Unicode 4.1). 392 See UAX #31 Identifier and Pattern Syntax 393 (http://www.unicode.org/reports/tr31/) 394 \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 395 UCHAR_PATTERN_WHITE_SPACE=43, 396 /** Binary property alnum (a C/POSIX character class). 397 Implemented according to the UTS #18 Annex C Standard Recommendation. 398 See the uchar.h file documentation. 399 \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 400 UCHAR_POSIX_ALNUM=44, 401 /** Binary property blank (a C/POSIX character class). 402 Implemented according to the UTS #18 Annex C Standard Recommendation. 403 See the uchar.h file documentation. 404 \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 405 UCHAR_POSIX_BLANK=45, 406 /** Binary property graph (a C/POSIX character class). 407 Implemented according to the UTS #18 Annex C Standard Recommendation. 408 See the uchar.h file documentation. 409 \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 410 UCHAR_POSIX_GRAPH=46, 411 /** Binary property print (a C/POSIX character class). 412 Implemented according to the UTS #18 Annex C Standard Recommendation. 413 See the uchar.h file documentation. 414 \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 415 UCHAR_POSIX_PRINT=47, 416 /** Binary property xdigit (a C/POSIX character class). 417 Implemented according to the UTS #18 Annex C Standard Recommendation. 418 See the uchar.h file documentation. 419 \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 420 UCHAR_POSIX_XDIGIT=48, 421 /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 422 UCHAR_CASED=49, 423 /** Binary property Case_Ignorable. Used in context-sensitive case mappings. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 424 UCHAR_CASE_IGNORABLE=50, 425 /** Binary property Changes_When_Lowercased. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 426 UCHAR_CHANGES_WHEN_LOWERCASED=51, 427 /** Binary property Changes_When_Uppercased. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 428 UCHAR_CHANGES_WHEN_UPPERCASED=52, 429 /** Binary property Changes_When_Titlecased. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 430 UCHAR_CHANGES_WHEN_TITLECASED=53, 431 /** Binary property Changes_When_Casefolded. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 432 UCHAR_CHANGES_WHEN_CASEFOLDED=54, 433 /** Binary property Changes_When_Casemapped. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 434 UCHAR_CHANGES_WHEN_CASEMAPPED=55, 435 /** Binary property Changes_When_NFKC_Casefolded. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 436 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, 437 /** 438 * Binary property Emoji. 439 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 440 * 441 * \xrefitem stable "Stable" "Stable List" ICU 57 442 */ 443 UCHAR_EMOJI=57, 444 /** 445 * Binary property Emoji_Presentation. 446 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 447 * 448 * \xrefitem stable "Stable" "Stable List" ICU 57 449 */ 450 UCHAR_EMOJI_PRESENTATION=58, 451 /** 452 * Binary property Emoji_Modifier. 453 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 454 * 455 * \xrefitem stable "Stable" "Stable List" ICU 57 456 */ 457 UCHAR_EMOJI_MODIFIER=59, 458 /** 459 * Binary property Emoji_Modifier_Base. 460 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 461 * 462 * \xrefitem stable "Stable" "Stable List" ICU 57 463 */ 464 UCHAR_EMOJI_MODIFIER_BASE=60, 465 /** 466 * Binary property Emoji_Component. 467 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 468 * 469 * \xrefitem stable "Stable" "Stable List" ICU 60 470 */ 471 UCHAR_EMOJI_COMPONENT=61, 472 /** 473 * Binary property Regional_Indicator. 474 * \xrefitem stable "Stable" "Stable List" ICU 60 475 */ 476 UCHAR_REGIONAL_INDICATOR=62, 477 /** 478 * Binary property Prepended_Concatenation_Mark. 479 * \xrefitem stable "Stable" "Stable List" ICU 60 480 */ 481 UCHAR_PREPENDED_CONCATENATION_MARK=63, 482 /** 483 * Binary property Extended_Pictographic. 484 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 485 * 486 * \xrefitem stable "Stable" "Stable List" ICU 62 487 */ 488 UCHAR_EXTENDED_PICTOGRAPHIC=64, 489 /** 490 * Binary property of strings Basic_Emoji. 491 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 492 * 493 * \xrefitem stable "Stable" "Stable List" ICU 70 494 */ 495 UCHAR_BASIC_EMOJI=65, 496 /** 497 * Binary property of strings Emoji_Keycap_Sequence. 498 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 499 * 500 * \xrefitem stable "Stable" "Stable List" ICU 70 501 */ 502 UCHAR_EMOJI_KEYCAP_SEQUENCE=66, 503 /** 504 * Binary property of strings RGI_Emoji_Modifier_Sequence. 505 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 506 * 507 * \xrefitem stable "Stable" "Stable List" ICU 70 508 */ 509 UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67, 510 /** 511 * Binary property of strings RGI_Emoji_Flag_Sequence. 512 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 513 * 514 * \xrefitem stable "Stable" "Stable List" ICU 70 515 */ 516 UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68, 517 /** 518 * Binary property of strings RGI_Emoji_Tag_Sequence. 519 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 520 * 521 * \xrefitem stable "Stable" "Stable List" ICU 70 522 */ 523 UCHAR_RGI_EMOJI_TAG_SEQUENCE=69, 524 /** 525 * Binary property of strings RGI_Emoji_ZWJ_Sequence. 526 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 527 * 528 * \xrefitem stable "Stable" "Stable List" ICU 70 529 */ 530 UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70, 531 /** 532 * Binary property of strings RGI_Emoji. 533 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 534 * 535 * \xrefitem stable "Stable" "Stable List" ICU 70 536 */ 537 UCHAR_RGI_EMOJI=71, 538 #ifndef U_HIDE_DRAFT_API 539 /** 540 * Binary property IDS_Unary_Operator. 541 * For programmatic determination of Ideographic Description Sequences. 542 * 543 * \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 74 544 */ 545 UCHAR_IDS_UNARY_OPERATOR=72, 546 /** 547 * Binary property ID_Compat_Math_Start. 548 * Used in mathematical identifier profile in UAX #31. 549 * \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 74 550 */ 551 UCHAR_ID_COMPAT_MATH_START=73, 552 /** 553 * Binary property ID_Compat_Math_Continue. 554 * Used in mathematical identifier profile in UAX #31. 555 * \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 74 556 */ 557 UCHAR_ID_COMPAT_MATH_CONTINUE=74, 558 #endif // U_HIDE_DRAFT_API 559 #ifndef U_HIDE_DEPRECATED_API 560 /** 561 * One more than the last constant for binary Unicode properties. 562 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 563 */ 564 UCHAR_BINARY_LIMIT=75, 565 #endif // U_HIDE_DEPRECATED_API 566 567 /** Enumerated property Bidi_Class. 568 Same as u_charDirection, returns UCharDirection values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 569 UCHAR_BIDI_CLASS=0x1000, 570 /** First constant for enumerated/integer Unicode properties. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 571 UCHAR_INT_START=UCHAR_BIDI_CLASS, 572 /** Enumerated property Block. 573 Same as ublock_getCode, returns UBlockCode values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 574 UCHAR_BLOCK=0x1001, 575 /** Enumerated property Canonical_Combining_Class. 576 Same as u_getCombiningClass, returns 8-bit numeric values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 577 UCHAR_CANONICAL_COMBINING_CLASS=0x1002, 578 /** Enumerated property Decomposition_Type. 579 Returns UDecompositionType values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 580 UCHAR_DECOMPOSITION_TYPE=0x1003, 581 /** Enumerated property East_Asian_Width. 582 See http://www.unicode.org/reports/tr11/ 583 Returns UEastAsianWidth values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 584 UCHAR_EAST_ASIAN_WIDTH=0x1004, 585 /** Enumerated property General_Category. 586 Same as u_charType, returns UCharCategory values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 587 UCHAR_GENERAL_CATEGORY=0x1005, 588 /** Enumerated property Joining_Group. 589 Returns UJoiningGroup values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 590 UCHAR_JOINING_GROUP=0x1006, 591 /** Enumerated property Joining_Type. 592 Returns UJoiningType values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 593 UCHAR_JOINING_TYPE=0x1007, 594 /** Enumerated property Line_Break. 595 Returns ULineBreak values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 596 UCHAR_LINE_BREAK=0x1008, 597 /** Enumerated property Numeric_Type. 598 Returns UNumericType values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 599 UCHAR_NUMERIC_TYPE=0x1009, 600 /** Enumerated property Script. 601 Same as uscript_getScript, returns UScriptCode values. \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 602 UCHAR_SCRIPT=0x100A, 603 /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. 604 Returns UHangulSyllableType values. \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 605 UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, 606 /** Enumerated property NFD_Quick_Check. 607 Returns UNormalizationCheckResult values. \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 608 UCHAR_NFD_QUICK_CHECK=0x100C, 609 /** Enumerated property NFKD_Quick_Check. 610 Returns UNormalizationCheckResult values. \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 611 UCHAR_NFKD_QUICK_CHECK=0x100D, 612 /** Enumerated property NFC_Quick_Check. 613 Returns UNormalizationCheckResult values. \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 614 UCHAR_NFC_QUICK_CHECK=0x100E, 615 /** Enumerated property NFKC_Quick_Check. 616 Returns UNormalizationCheckResult values. \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 617 UCHAR_NFKC_QUICK_CHECK=0x100F, 618 /** Enumerated property Lead_Canonical_Combining_Class. 619 ICU-specific property for the ccc of the first code point 620 of the decomposition, or lccc(c)=ccc(NFD(c)[0]). 621 Useful for checking for canonically ordered text; 622 see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . 623 Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 624 UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, 625 /** Enumerated property Trail_Canonical_Combining_Class. 626 ICU-specific property for the ccc of the last code point 627 of the decomposition, or tccc(c)=ccc(NFD(c)[last]). 628 Useful for checking for canonically ordered text; 629 see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . 630 Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 631 UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, 632 /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). 633 Used in UAX #29: Text Boundaries 634 (http://www.unicode.org/reports/tr29/) 635 Returns UGraphemeClusterBreak values. \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 636 UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, 637 /** Enumerated property Sentence_Break (new in Unicode 4.1). 638 Used in UAX #29: Text Boundaries 639 (http://www.unicode.org/reports/tr29/) 640 Returns USentenceBreak values. \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 641 UCHAR_SENTENCE_BREAK=0x1013, 642 /** Enumerated property Word_Break (new in Unicode 4.1). 643 Used in UAX #29: Text Boundaries 644 (http://www.unicode.org/reports/tr29/) 645 Returns UWordBreakValues values. \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 646 UCHAR_WORD_BREAK=0x1014, 647 /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). 648 Used in UAX #9: Unicode Bidirectional Algorithm 649 (http://www.unicode.org/reports/tr9/) 650 Returns UBidiPairedBracketType values. \xrefitem stable "Stable" "Stable List" ICU 52 */ 651 UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, 652 /** 653 * Enumerated property Indic_Positional_Category. 654 * New in Unicode 6.0 as provisional property Indic_Matra_Category; 655 * renamed and changed to informative in Unicode 8.0. 656 * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt 657 * \xrefitem stable "Stable" "Stable List" ICU 63 658 */ 659 UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016, 660 /** 661 * Enumerated property Indic_Syllabic_Category. 662 * New in Unicode 6.0 as provisional; informative since Unicode 8.0. 663 * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt 664 * \xrefitem stable "Stable" "Stable List" ICU 63 665 */ 666 UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017, 667 /** 668 * Enumerated property Vertical_Orientation. 669 * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). 670 * New as a UCD property in Unicode 10.0. 671 * \xrefitem stable "Stable" "Stable List" ICU 63 672 */ 673 UCHAR_VERTICAL_ORIENTATION=0x1018, 674 #ifndef U_HIDE_DRAFT_API 675 /** 676 * Enumerated property Identifier_Status. 677 * Used for UTS #39 General Security Profile for Identifiers 678 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 679 * \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 680 */ 681 UCHAR_IDENTIFIER_STATUS=0x1019, 682 #endif // U_HIDE_DRAFT_API 683 #ifndef U_HIDE_DEPRECATED_API 684 /** 685 * One more than the last constant for enumerated/integer Unicode properties. 686 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 687 */ 688 UCHAR_INT_LIMIT=0x101A, 689 #endif // U_HIDE_DEPRECATED_API 690 691 /** Bitmask property General_Category_Mask. 692 This is the General_Category property returned as a bit mask. 693 When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), 694 returns bit masks for UCharCategory values where exactly one bit is set. 695 When used with u_getPropertyValueName() and u_getPropertyValueEnum(), 696 a multi-bit mask is used for sets of categories like "Letters". 697 Mask values should be cast to uint32_t. 698 \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 699 UCHAR_GENERAL_CATEGORY_MASK=0x2000, 700 /** First constant for bit-mask Unicode properties. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 701 UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, 702 #ifndef U_HIDE_DEPRECATED_API 703 /** 704 * One more than the last constant for bit-mask Unicode properties. 705 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 706 */ 707 UCHAR_MASK_LIMIT=0x2001, 708 #endif // U_HIDE_DEPRECATED_API 709 710 /** Double property Numeric_Value. 711 Corresponds to u_getNumericValue. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 712 UCHAR_NUMERIC_VALUE=0x3000, 713 /** First constant for double Unicode properties. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 714 UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, 715 #ifndef U_HIDE_DEPRECATED_API 716 /** 717 * One more than the last constant for double Unicode properties. 718 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 719 */ 720 UCHAR_DOUBLE_LIMIT=0x3001, 721 #endif // U_HIDE_DEPRECATED_API 722 723 /** String property Age. 724 Corresponds to u_charAge. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 725 UCHAR_AGE=0x4000, 726 /** First constant for string Unicode properties. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 727 UCHAR_STRING_START=UCHAR_AGE, 728 /** String property Bidi_Mirroring_Glyph. 729 Corresponds to u_charMirror. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 730 UCHAR_BIDI_MIRRORING_GLYPH=0x4001, 731 /** String property Case_Folding. 732 Corresponds to u_strFoldCase in ustring.h. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 733 UCHAR_CASE_FOLDING=0x4002, 734 #ifndef U_HIDE_DEPRECATED_API 735 /** Deprecated string property ISO_Comment. 736 Corresponds to u_getISOComment. \xrefitem deprecated "Deprecated" "Deprecated List" ICU 49 */ 737 UCHAR_ISO_COMMENT=0x4003, 738 #endif /* U_HIDE_DEPRECATED_API */ 739 /** String property Lowercase_Mapping. 740 Corresponds to u_strToLower in ustring.h. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 741 UCHAR_LOWERCASE_MAPPING=0x4004, 742 /** String property Name. 743 Corresponds to u_charName. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 744 UCHAR_NAME=0x4005, 745 /** String property Simple_Case_Folding. 746 Corresponds to u_foldCase. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 747 UCHAR_SIMPLE_CASE_FOLDING=0x4006, 748 /** String property Simple_Lowercase_Mapping. 749 Corresponds to u_tolower. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 750 UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, 751 /** String property Simple_Titlecase_Mapping. 752 Corresponds to u_totitle. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 753 UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, 754 /** String property Simple_Uppercase_Mapping. 755 Corresponds to u_toupper. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 756 UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, 757 /** String property Titlecase_Mapping. 758 Corresponds to u_strToTitle in ustring.h. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 759 UCHAR_TITLECASE_MAPPING=0x400A, 760 #ifndef U_HIDE_DEPRECATED_API 761 /** String property Unicode_1_Name. 762 This property is of little practical value. 763 Beginning with ICU 49, ICU APIs return an empty string for this property. 764 Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). \xrefitem deprecated "Deprecated" "Deprecated List" ICU 49 */ 765 UCHAR_UNICODE_1_NAME=0x400B, 766 #endif /* U_HIDE_DEPRECATED_API */ 767 /** String property Uppercase_Mapping. 768 Corresponds to u_strToUpper in ustring.h. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 769 UCHAR_UPPERCASE_MAPPING=0x400C, 770 /** String property Bidi_Paired_Bracket (new in Unicode 6.3). 771 Corresponds to u_getBidiPairedBracket. \xrefitem stable "Stable" "Stable List" ICU 52 */ 772 UCHAR_BIDI_PAIRED_BRACKET=0x400D, 773 #ifndef U_HIDE_DEPRECATED_API 774 /** 775 * One more than the last constant for string Unicode properties. 776 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 777 */ 778 UCHAR_STRING_LIMIT=0x400E, 779 #endif // U_HIDE_DEPRECATED_API 780 781 /** Miscellaneous property Script_Extensions (new in Unicode 6.0). 782 Some characters are commonly used in multiple scripts. 783 For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 784 Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. 785 \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 786 UCHAR_SCRIPT_EXTENSIONS=0x7000, 787 /** First constant for Unicode properties with unusual value types. \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 788 UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, 789 #ifndef U_HIDE_DRAFT_API 790 /** 791 * Miscellaneous property Identifier_Type. 792 * Used for UTS #39 General Security Profile for Identifiers 793 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 794 * 795 * Corresponds to u_hasIDType() and u_getIDTypes(). 796 * 797 * Each code point maps to a <i>set</i> of UIdentifierType values. 798 * 799 * @see u_hasIDType 800 * @see u_getIDTypes 801 * \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 802 */ 803 UCHAR_IDENTIFIER_TYPE=0x7001, 804 #endif // U_HIDE_DRAFT_API 805 #ifndef U_HIDE_DEPRECATED_API 806 /** 807 * One more than the last constant for Unicode properties with unusual value types. 808 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 809 */ 810 UCHAR_OTHER_PROPERTY_LIMIT=0x7002, 811 #endif // U_HIDE_DEPRECATED_API 812 813 /** Represents a nonexistent or invalid property or property value. \xrefitem stable "Stable" "Stable List" ICU 2.4 */ 814 UCHAR_INVALID_CODE = -1 815 } UProperty; 816 817 /** 818 * Data for enumerated Unicode general category types. 819 * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . 820 * \xrefitem stable "Stable" "Stable List" ICU 2.0 821 */ 822 typedef enum UCharCategory 823 { 824 /* 825 * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. 826 * It matches pairs of lines like 827 * / ** <Unicode 2-letter General_Category value> comment... * / 828 * U_<[A-Z_]+> = <integer>, 829 */ 830 831 /** Non-category for unassigned and non-character code points. \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 832 U_UNASSIGNED = 0, 833 /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 834 U_GENERAL_OTHER_TYPES = 0, 835 /** Lu \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 836 U_UPPERCASE_LETTER = 1, 837 /** Ll \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 838 U_LOWERCASE_LETTER = 2, 839 /** Lt \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 840 U_TITLECASE_LETTER = 3, 841 /** Lm \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 842 U_MODIFIER_LETTER = 4, 843 /** Lo \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 844 U_OTHER_LETTER = 5, 845 /** Mn \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 846 U_NON_SPACING_MARK = 6, 847 /** Me \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 848 U_ENCLOSING_MARK = 7, 849 /** Mc \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 850 U_COMBINING_SPACING_MARK = 8, 851 /** Nd \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 852 U_DECIMAL_DIGIT_NUMBER = 9, 853 /** Nl \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 854 U_LETTER_NUMBER = 10, 855 /** No \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 856 U_OTHER_NUMBER = 11, 857 /** Zs \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 858 U_SPACE_SEPARATOR = 12, 859 /** Zl \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 860 U_LINE_SEPARATOR = 13, 861 /** Zp \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 862 U_PARAGRAPH_SEPARATOR = 14, 863 /** Cc \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 864 U_CONTROL_CHAR = 15, 865 /** Cf \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 866 U_FORMAT_CHAR = 16, 867 /** Co \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 868 U_PRIVATE_USE_CHAR = 17, 869 /** Cs \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 870 U_SURROGATE = 18, 871 /** Pd \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 872 U_DASH_PUNCTUATION = 19, 873 /** Ps \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 874 U_START_PUNCTUATION = 20, 875 /** Pe \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 876 U_END_PUNCTUATION = 21, 877 /** Pc \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 878 U_CONNECTOR_PUNCTUATION = 22, 879 /** Po \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 880 U_OTHER_PUNCTUATION = 23, 881 /** Sm \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 882 U_MATH_SYMBOL = 24, 883 /** Sc \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 884 U_CURRENCY_SYMBOL = 25, 885 /** Sk \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 886 U_MODIFIER_SYMBOL = 26, 887 /** So \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 888 U_OTHER_SYMBOL = 27, 889 /** Pi \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 890 U_INITIAL_PUNCTUATION = 28, 891 /** Pf \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 892 U_FINAL_PUNCTUATION = 29, 893 /** 894 * One higher than the last enum UCharCategory constant. 895 * This numeric value is stable (will not change), see 896 * http://www.unicode.org/policies/stability_policy.html#Property_Value 897 * 898 * \xrefitem stable "Stable" "Stable List" ICU 2.0 899 */ 900 U_CHAR_CATEGORY_COUNT 901 } UCharCategory; 902 903 /** 904 * U_GC_XX_MASK constants are bit flags corresponding to Unicode 905 * general category values. 906 * For each category, the nth bit is set if the numeric value of the 907 * corresponding UCharCategory constant is n. 908 * 909 * There are also some U_GC_Y_MASK constants for groups of general categories 910 * like L for all letter categories. 911 * 912 * @see u_charType 913 * @see U_GET_GC_MASK 914 * @see UCharCategory 915 * \xrefitem stable "Stable" "Stable List" ICU 2.1 916 */ 917 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) 918 919 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 920 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) 921 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 922 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) 923 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 924 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) 925 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 926 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) 927 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 928 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) 929 930 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 931 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) 932 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 933 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) 934 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 935 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) 936 937 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 938 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) 939 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 940 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) 941 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 942 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) 943 944 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 945 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) 946 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 947 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) 948 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 949 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) 950 951 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 952 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) 953 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 954 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) 955 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 956 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) 957 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 958 #define U_GC_CS_MASK U_MASK(U_SURROGATE) 959 960 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 961 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) 962 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 963 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) 964 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 965 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) 966 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 967 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) 968 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 969 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) 970 971 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 972 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) 973 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 974 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) 975 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 976 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) 977 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 978 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) 979 980 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 981 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) 982 /** Mask constant for a UCharCategory. \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 983 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) 984 985 986 /** Mask constant for multiple UCharCategory bits (L Letters). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 987 #define U_GC_L_MASK \ 988 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) 989 990 /** Mask constant for multiple UCharCategory bits (LC Cased Letters). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 991 #define U_GC_LC_MASK \ 992 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) 993 994 /** Mask constant for multiple UCharCategory bits (M Marks). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 995 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) 996 997 /** Mask constant for multiple UCharCategory bits (N Numbers). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 998 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) 999 1000 /** Mask constant for multiple UCharCategory bits (Z Separators). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 1001 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) 1002 1003 /** Mask constant for multiple UCharCategory bits (C Others). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 1004 #define U_GC_C_MASK \ 1005 (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) 1006 1007 /** Mask constant for multiple UCharCategory bits (P Punctuation). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 1008 #define U_GC_P_MASK \ 1009 (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ 1010 U_GC_PI_MASK|U_GC_PF_MASK) 1011 1012 /** Mask constant for multiple UCharCategory bits (S Symbols). \xrefitem stable "Stable" "Stable List" ICU 2.1 */ 1013 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) 1014 1015 /** 1016 * This specifies the language directional property of a character set. 1017 * \xrefitem stable "Stable" "Stable List" ICU 2.0 1018 */ 1019 typedef enum UCharDirection { 1020 /* 1021 * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. 1022 * It matches pairs of lines like 1023 * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / 1024 * U_<[A-Z_]+> = <integer>, 1025 */ 1026 1027 /** L \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1028 U_LEFT_TO_RIGHT = 0, 1029 /** R \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1030 U_RIGHT_TO_LEFT = 1, 1031 /** EN \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1032 U_EUROPEAN_NUMBER = 2, 1033 /** ES \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1034 U_EUROPEAN_NUMBER_SEPARATOR = 3, 1035 /** ET \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1036 U_EUROPEAN_NUMBER_TERMINATOR = 4, 1037 /** AN \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1038 U_ARABIC_NUMBER = 5, 1039 /** CS \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1040 U_COMMON_NUMBER_SEPARATOR = 6, 1041 /** B \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1042 U_BLOCK_SEPARATOR = 7, 1043 /** S \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1044 U_SEGMENT_SEPARATOR = 8, 1045 /** WS \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1046 U_WHITE_SPACE_NEUTRAL = 9, 1047 /** ON \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1048 U_OTHER_NEUTRAL = 10, 1049 /** LRE \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1050 U_LEFT_TO_RIGHT_EMBEDDING = 11, 1051 /** LRO \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1052 U_LEFT_TO_RIGHT_OVERRIDE = 12, 1053 /** AL \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1054 U_RIGHT_TO_LEFT_ARABIC = 13, 1055 /** RLE \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1056 U_RIGHT_TO_LEFT_EMBEDDING = 14, 1057 /** RLO \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1058 U_RIGHT_TO_LEFT_OVERRIDE = 15, 1059 /** PDF \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1060 U_POP_DIRECTIONAL_FORMAT = 16, 1061 /** NSM \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1062 U_DIR_NON_SPACING_MARK = 17, 1063 /** BN \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1064 U_BOUNDARY_NEUTRAL = 18, 1065 /** FSI \xrefitem stable "Stable" "Stable List" ICU 52 */ 1066 U_FIRST_STRONG_ISOLATE = 19, 1067 /** LRI \xrefitem stable "Stable" "Stable List" ICU 52 */ 1068 U_LEFT_TO_RIGHT_ISOLATE = 20, 1069 /** RLI \xrefitem stable "Stable" "Stable List" ICU 52 */ 1070 U_RIGHT_TO_LEFT_ISOLATE = 21, 1071 /** PDI \xrefitem stable "Stable" "Stable List" ICU 52 */ 1072 U_POP_DIRECTIONAL_ISOLATE = 22, 1073 #ifndef U_HIDE_DEPRECATED_API 1074 /** 1075 * One more than the highest UCharDirection value. 1076 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS). 1077 * 1078 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 1079 */ 1080 U_CHAR_DIRECTION_COUNT 1081 #endif // U_HIDE_DEPRECATED_API 1082 } UCharDirection; 1083 1084 /** 1085 * Bidi Paired Bracket Type constants. 1086 * 1087 * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE 1088 * \xrefitem stable "Stable" "Stable List" ICU 52 1089 */ 1090 typedef enum UBidiPairedBracketType { 1091 /* 1092 * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. 1093 * It matches lines like 1094 * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name> 1095 */ 1096 1097 /** Not a paired bracket. \xrefitem stable "Stable" "Stable List" ICU 52 */ 1098 U_BPT_NONE, 1099 /** Open paired bracket. \xrefitem stable "Stable" "Stable List" ICU 52 */ 1100 U_BPT_OPEN, 1101 /** Close paired bracket. \xrefitem stable "Stable" "Stable List" ICU 52 */ 1102 U_BPT_CLOSE, 1103 #ifndef U_HIDE_DEPRECATED_API 1104 /** 1105 * One more than the highest normal UBidiPairedBracketType value. 1106 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE). 1107 * 1108 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 1109 */ 1110 U_BPT_COUNT /* 3 */ 1111 #endif // U_HIDE_DEPRECATED_API 1112 } UBidiPairedBracketType; 1113 1114 /** 1115 * Constants for Unicode blocks, see the Unicode Data file Blocks.txt 1116 * \xrefitem stable "Stable" "Stable List" ICU 2.0 1117 */ 1118 enum UBlockCode { 1119 /* 1120 * Note: UBlockCode constants are parsed by preparseucd.py. 1121 * It matches lines like 1122 * UBLOCK_<Unicode Block value name> = <integer>, 1123 */ 1124 1125 /** New No_Block value in Unicode 4. \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1126 UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ 1127 1128 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1129 UBLOCK_BASIC_LATIN = 1, /*[0000]*/ 1130 1131 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1132 UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ 1133 1134 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1135 UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ 1136 1137 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1138 UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ 1139 1140 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1141 UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ 1142 1143 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1144 UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ 1145 1146 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1147 UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ 1148 1149 /** 1150 * Unicode 3.2 renames this block to "Greek and Coptic". 1151 * \xrefitem stable "Stable" "Stable List" ICU 2.0 1152 */ 1153 UBLOCK_GREEK =8, /*[0370]*/ 1154 1155 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1156 UBLOCK_CYRILLIC =9, /*[0400]*/ 1157 1158 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1159 UBLOCK_ARMENIAN =10, /*[0530]*/ 1160 1161 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1162 UBLOCK_HEBREW =11, /*[0590]*/ 1163 1164 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1165 UBLOCK_ARABIC =12, /*[0600]*/ 1166 1167 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1168 UBLOCK_SYRIAC =13, /*[0700]*/ 1169 1170 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1171 UBLOCK_THAANA =14, /*[0780]*/ 1172 1173 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1174 UBLOCK_DEVANAGARI =15, /*[0900]*/ 1175 1176 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1177 UBLOCK_BENGALI =16, /*[0980]*/ 1178 1179 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1180 UBLOCK_GURMUKHI =17, /*[0A00]*/ 1181 1182 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1183 UBLOCK_GUJARATI =18, /*[0A80]*/ 1184 1185 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1186 UBLOCK_ORIYA =19, /*[0B00]*/ 1187 1188 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1189 UBLOCK_TAMIL =20, /*[0B80]*/ 1190 1191 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1192 UBLOCK_TELUGU =21, /*[0C00]*/ 1193 1194 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1195 UBLOCK_KANNADA =22, /*[0C80]*/ 1196 1197 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1198 UBLOCK_MALAYALAM =23, /*[0D00]*/ 1199 1200 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1201 UBLOCK_SINHALA =24, /*[0D80]*/ 1202 1203 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1204 UBLOCK_THAI =25, /*[0E00]*/ 1205 1206 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1207 UBLOCK_LAO =26, /*[0E80]*/ 1208 1209 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1210 UBLOCK_TIBETAN =27, /*[0F00]*/ 1211 1212 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1213 UBLOCK_MYANMAR =28, /*[1000]*/ 1214 1215 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1216 UBLOCK_GEORGIAN =29, /*[10A0]*/ 1217 1218 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1219 UBLOCK_HANGUL_JAMO =30, /*[1100]*/ 1220 1221 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1222 UBLOCK_ETHIOPIC =31, /*[1200]*/ 1223 1224 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1225 UBLOCK_CHEROKEE =32, /*[13A0]*/ 1226 1227 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1228 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ 1229 1230 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1231 UBLOCK_OGHAM =34, /*[1680]*/ 1232 1233 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1234 UBLOCK_RUNIC =35, /*[16A0]*/ 1235 1236 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1237 UBLOCK_KHMER =36, /*[1780]*/ 1238 1239 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1240 UBLOCK_MONGOLIAN =37, /*[1800]*/ 1241 1242 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1243 UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ 1244 1245 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1246 UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ 1247 1248 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1249 UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ 1250 1251 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1252 UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ 1253 1254 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1255 UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ 1256 1257 /** 1258 * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". 1259 * \xrefitem stable "Stable" "Stable List" ICU 2.0 1260 */ 1261 UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ 1262 1263 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1264 UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ 1265 1266 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1267 UBLOCK_NUMBER_FORMS =45, /*[2150]*/ 1268 1269 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1270 UBLOCK_ARROWS =46, /*[2190]*/ 1271 1272 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1273 UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ 1274 1275 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1276 UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ 1277 1278 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1279 UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ 1280 1281 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1282 UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ 1283 1284 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1285 UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ 1286 1287 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1288 UBLOCK_BOX_DRAWING =52, /*[2500]*/ 1289 1290 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1291 UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ 1292 1293 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1294 UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ 1295 1296 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1297 UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ 1298 1299 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1300 UBLOCK_DINGBATS =56, /*[2700]*/ 1301 1302 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1303 UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ 1304 1305 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1306 UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ 1307 1308 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1309 UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ 1310 1311 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1312 UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ 1313 1314 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1315 UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ 1316 1317 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1318 UBLOCK_HIRAGANA =62, /*[3040]*/ 1319 1320 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1321 UBLOCK_KATAKANA =63, /*[30A0]*/ 1322 1323 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1324 UBLOCK_BOPOMOFO =64, /*[3100]*/ 1325 1326 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1327 UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ 1328 1329 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1330 UBLOCK_KANBUN =66, /*[3190]*/ 1331 1332 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1333 UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ 1334 1335 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1336 UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ 1337 1338 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1339 UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ 1340 1341 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1342 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ 1343 1344 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1345 UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ 1346 1347 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1348 UBLOCK_YI_SYLLABLES =72, /*[A000]*/ 1349 1350 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1351 UBLOCK_YI_RADICALS =73, /*[A490]*/ 1352 1353 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1354 UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ 1355 1356 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1357 UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ 1358 1359 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1360 UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ 1361 1362 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1363 UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ 1364 1365 /** 1366 * Same as UBLOCK_PRIVATE_USE. 1367 * Until Unicode 3.1.1, the corresponding block name was "Private Use", 1368 * and multiple code point ranges had this block. 1369 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and 1370 * adds separate blocks for the supplementary PUAs. 1371 * 1372 * \xrefitem stable "Stable" "Stable List" ICU 2.0 1373 */ 1374 UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ 1375 /** 1376 * Same as UBLOCK_PRIVATE_USE_AREA. 1377 * Until Unicode 3.1.1, the corresponding block name was "Private Use", 1378 * and multiple code point ranges had this block. 1379 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and 1380 * adds separate blocks for the supplementary PUAs. 1381 * 1382 * \xrefitem stable "Stable" "Stable List" ICU 2.0 1383 */ 1384 UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, 1385 1386 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1387 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ 1388 1389 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1390 UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ 1391 1392 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1393 UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ 1394 1395 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1396 UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ 1397 1398 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1399 UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ 1400 1401 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1402 UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ 1403 1404 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1405 UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ 1406 1407 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1408 UBLOCK_SPECIALS =86, /*[FFF0]*/ 1409 1410 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1411 UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ 1412 1413 /* New blocks in Unicode 3.1 */ 1414 1415 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1416 UBLOCK_OLD_ITALIC = 88, /*[10300]*/ 1417 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1418 UBLOCK_GOTHIC = 89, /*[10330]*/ 1419 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1420 UBLOCK_DESERET = 90, /*[10400]*/ 1421 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1422 UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ 1423 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1424 UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ 1425 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1426 UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ 1427 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1428 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ 1429 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1430 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ 1431 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1432 UBLOCK_TAGS = 96, /*[E0000]*/ 1433 1434 /* New blocks in Unicode 3.2 */ 1435 1436 /** \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 1437 UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ 1438 /** 1439 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1440 * \xrefitem stable "Stable" "Stable List" ICU 2.2 1441 */ 1442 UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, 1443 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1444 UBLOCK_TAGALOG = 98, /*[1700]*/ 1445 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1446 UBLOCK_HANUNOO = 99, /*[1720]*/ 1447 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1448 UBLOCK_BUHID = 100, /*[1740]*/ 1449 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1450 UBLOCK_TAGBANWA = 101, /*[1760]*/ 1451 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1452 UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ 1453 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1454 UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ 1455 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1456 UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ 1457 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1458 UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ 1459 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1460 UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ 1461 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1462 UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ 1463 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1464 UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ 1465 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1466 UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ 1467 /** \xrefitem stable "Stable" "Stable List" ICU 2.2 */ 1468 UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ 1469 1470 /* New blocks in Unicode 4 */ 1471 1472 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1473 UBLOCK_LIMBU = 111, /*[1900]*/ 1474 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1475 UBLOCK_TAI_LE = 112, /*[1950]*/ 1476 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1477 UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ 1478 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1479 UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ 1480 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1481 UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ 1482 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1483 UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ 1484 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1485 UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ 1486 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1487 UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ 1488 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1489 UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ 1490 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1491 UBLOCK_UGARITIC = 120, /*[10380]*/ 1492 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1493 UBLOCK_SHAVIAN = 121, /*[10450]*/ 1494 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1495 UBLOCK_OSMANYA = 122, /*[10480]*/ 1496 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1497 UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ 1498 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1499 UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ 1500 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 1501 UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ 1502 1503 /* New blocks in Unicode 4.1 */ 1504 1505 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1506 UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ 1507 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1508 UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ 1509 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1510 UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ 1511 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1512 UBLOCK_BUGINESE = 129, /*[1A00]*/ 1513 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1514 UBLOCK_CJK_STROKES = 130, /*[31C0]*/ 1515 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1516 UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ 1517 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1518 UBLOCK_COPTIC = 132, /*[2C80]*/ 1519 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1520 UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ 1521 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1522 UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ 1523 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1524 UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ 1525 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1526 UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ 1527 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1528 UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ 1529 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1530 UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ 1531 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1532 UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ 1533 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1534 UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ 1535 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1536 UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ 1537 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1538 UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ 1539 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1540 UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ 1541 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1542 UBLOCK_TIFINAGH = 144, /*[2D30]*/ 1543 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 1544 UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ 1545 1546 /* New blocks in Unicode 5.0 */ 1547 1548 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1549 UBLOCK_NKO = 146, /*[07C0]*/ 1550 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1551 UBLOCK_BALINESE = 147, /*[1B00]*/ 1552 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1553 UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ 1554 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1555 UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ 1556 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1557 UBLOCK_PHAGS_PA = 150, /*[A840]*/ 1558 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1559 UBLOCK_PHOENICIAN = 151, /*[10900]*/ 1560 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1561 UBLOCK_CUNEIFORM = 152, /*[12000]*/ 1562 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1563 UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ 1564 /** \xrefitem stable "Stable" "Stable List" ICU 3.6 */ 1565 UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ 1566 1567 /* New blocks in Unicode 5.1 */ 1568 1569 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1570 UBLOCK_SUNDANESE = 155, /*[1B80]*/ 1571 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1572 UBLOCK_LEPCHA = 156, /*[1C00]*/ 1573 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1574 UBLOCK_OL_CHIKI = 157, /*[1C50]*/ 1575 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1576 UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ 1577 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1578 UBLOCK_VAI = 159, /*[A500]*/ 1579 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1580 UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ 1581 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1582 UBLOCK_SAURASHTRA = 161, /*[A880]*/ 1583 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1584 UBLOCK_KAYAH_LI = 162, /*[A900]*/ 1585 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1586 UBLOCK_REJANG = 163, /*[A930]*/ 1587 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1588 UBLOCK_CHAM = 164, /*[AA00]*/ 1589 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1590 UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ 1591 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1592 UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ 1593 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1594 UBLOCK_LYCIAN = 167, /*[10280]*/ 1595 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1596 UBLOCK_CARIAN = 168, /*[102A0]*/ 1597 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1598 UBLOCK_LYDIAN = 169, /*[10920]*/ 1599 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1600 UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ 1601 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 1602 UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ 1603 1604 /* New blocks in Unicode 5.2 */ 1605 1606 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1607 UBLOCK_SAMARITAN = 172, /*[0800]*/ 1608 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1609 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ 1610 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1611 UBLOCK_TAI_THAM = 174, /*[1A20]*/ 1612 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1613 UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ 1614 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1615 UBLOCK_LISU = 176, /*[A4D0]*/ 1616 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1617 UBLOCK_BAMUM = 177, /*[A6A0]*/ 1618 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1619 UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ 1620 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1621 UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ 1622 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1623 UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ 1624 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1625 UBLOCK_JAVANESE = 181, /*[A980]*/ 1626 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1627 UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ 1628 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1629 UBLOCK_TAI_VIET = 183, /*[AA80]*/ 1630 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1631 UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ 1632 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1633 UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ 1634 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1635 UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ 1636 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1637 UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ 1638 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1639 UBLOCK_AVESTAN = 188, /*[10B00]*/ 1640 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1641 UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ 1642 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1643 UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ 1644 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1645 UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ 1646 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1647 UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ 1648 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1649 UBLOCK_KAITHI = 193, /*[11080]*/ 1650 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1651 UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ 1652 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1653 UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ 1654 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1655 UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ 1656 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 1657 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ 1658 1659 /* New blocks in Unicode 6.0 */ 1660 1661 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1662 UBLOCK_MANDAIC = 198, /*[0840]*/ 1663 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1664 UBLOCK_BATAK = 199, /*[1BC0]*/ 1665 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1666 UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ 1667 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1668 UBLOCK_BRAHMI = 201, /*[11000]*/ 1669 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1670 UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ 1671 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1672 UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ 1673 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1674 UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ 1675 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1676 UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ 1677 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1678 UBLOCK_EMOTICONS = 206, /*[1F600]*/ 1679 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1680 UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ 1681 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1682 UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ 1683 /** \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 1684 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ 1685 1686 /* New blocks in Unicode 6.1 */ 1687 1688 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1689 UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ 1690 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1691 UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ 1692 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1693 UBLOCK_CHAKMA = 212, /*[11100]*/ 1694 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1695 UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ 1696 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1697 UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ 1698 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1699 UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ 1700 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1701 UBLOCK_MIAO = 216, /*[16F00]*/ 1702 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1703 UBLOCK_SHARADA = 217, /*[11180]*/ 1704 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1705 UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ 1706 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1707 UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ 1708 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 1709 UBLOCK_TAKRI = 220, /*[11680]*/ 1710 1711 /* New blocks in Unicode 7.0 */ 1712 1713 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1714 UBLOCK_BASSA_VAH = 221, /*[16AD0]*/ 1715 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1716 UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/ 1717 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1718 UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/ 1719 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1720 UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/ 1721 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1722 UBLOCK_DUPLOYAN = 225, /*[1BC00]*/ 1723 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1724 UBLOCK_ELBASAN = 226, /*[10500]*/ 1725 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1726 UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/ 1727 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1728 UBLOCK_GRANTHA = 228, /*[11300]*/ 1729 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1730 UBLOCK_KHOJKI = 229, /*[11200]*/ 1731 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1732 UBLOCK_KHUDAWADI = 230, /*[112B0]*/ 1733 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1734 UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/ 1735 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1736 UBLOCK_LINEAR_A = 232, /*[10600]*/ 1737 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1738 UBLOCK_MAHAJANI = 233, /*[11150]*/ 1739 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1740 UBLOCK_MANICHAEAN = 234, /*[10AC0]*/ 1741 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1742 UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/ 1743 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1744 UBLOCK_MODI = 236, /*[11600]*/ 1745 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1746 UBLOCK_MRO = 237, /*[16A40]*/ 1747 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1748 UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/ 1749 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1750 UBLOCK_NABATAEAN = 239, /*[10880]*/ 1751 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1752 UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/ 1753 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1754 UBLOCK_OLD_PERMIC = 241, /*[10350]*/ 1755 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1756 UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/ 1757 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1758 UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/ 1759 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1760 UBLOCK_PALMYRENE = 244, /*[10860]*/ 1761 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1762 UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/ 1763 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1764 UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/ 1765 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1766 UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/ 1767 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1768 UBLOCK_SIDDHAM = 248, /*[11580]*/ 1769 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1770 UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/ 1771 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1772 UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/ 1773 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1774 UBLOCK_TIRHUTA = 251, /*[11480]*/ 1775 /** \xrefitem stable "Stable" "Stable List" ICU 54 */ 1776 UBLOCK_WARANG_CITI = 252, /*[118A0]*/ 1777 1778 /* New blocks in Unicode 8.0 */ 1779 1780 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1781 UBLOCK_AHOM = 253, /*[11700]*/ 1782 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1783 UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/ 1784 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1785 UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/ 1786 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1787 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/ 1788 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1789 UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/ 1790 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1791 UBLOCK_HATRAN = 258, /*[108E0]*/ 1792 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1793 UBLOCK_MULTANI = 259, /*[11280]*/ 1794 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1795 UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/ 1796 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1797 UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/ 1798 /** \xrefitem stable "Stable" "Stable List" ICU 56 */ 1799 UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/ 1800 1801 /* New blocks in Unicode 9.0 */ 1802 1803 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1804 UBLOCK_ADLAM = 263, /*[1E900]*/ 1805 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1806 UBLOCK_BHAIKSUKI = 264, /*[11C00]*/ 1807 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1808 UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/ 1809 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1810 UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/ 1811 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1812 UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/ 1813 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1814 UBLOCK_MARCHEN = 268, /*[11C70]*/ 1815 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1816 UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/ 1817 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1818 UBLOCK_NEWA = 270, /*[11400]*/ 1819 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1820 UBLOCK_OSAGE = 271, /*[104B0]*/ 1821 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1822 UBLOCK_TANGUT = 272, /*[17000]*/ 1823 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 1824 UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/ 1825 1826 // New blocks in Unicode 10.0 1827 1828 /** \xrefitem stable "Stable" "Stable List" ICU 60 */ 1829 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/ 1830 /** \xrefitem stable "Stable" "Stable List" ICU 60 */ 1831 UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/ 1832 /** \xrefitem stable "Stable" "Stable List" ICU 60 */ 1833 UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/ 1834 /** \xrefitem stable "Stable" "Stable List" ICU 60 */ 1835 UBLOCK_NUSHU = 277, /*[1B170]*/ 1836 /** \xrefitem stable "Stable" "Stable List" ICU 60 */ 1837 UBLOCK_SOYOMBO = 278, /*[11A50]*/ 1838 /** \xrefitem stable "Stable" "Stable List" ICU 60 */ 1839 UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/ 1840 /** \xrefitem stable "Stable" "Stable List" ICU 60 */ 1841 UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/ 1842 1843 // New blocks in Unicode 11.0 1844 1845 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1846 UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/ 1847 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1848 UBLOCK_DOGRA = 282, /*[11800]*/ 1849 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1850 UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/ 1851 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1852 UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/ 1853 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1854 UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/ 1855 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1856 UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/ 1857 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1858 UBLOCK_MAKASAR = 287, /*[11EE0]*/ 1859 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1860 UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/ 1861 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1862 UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/ 1863 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1864 UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/ 1865 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 1866 UBLOCK_SOGDIAN = 291, /*[10F30]*/ 1867 1868 // New blocks in Unicode 12.0 1869 1870 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1871 UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292, /*[13430]*/ 1872 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1873 UBLOCK_ELYMAIC = 293, /*[10FE0]*/ 1874 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1875 UBLOCK_NANDINAGARI = 294, /*[119A0]*/ 1876 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1877 UBLOCK_NYIAKENG_PUACHUE_HMONG = 295, /*[1E100]*/ 1878 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1879 UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, /*[1ED00]*/ 1880 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1881 UBLOCK_SMALL_KANA_EXTENSION = 297, /*[1B130]*/ 1882 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1883 UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298, /*[1FA70]*/ 1884 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1885 UBLOCK_TAMIL_SUPPLEMENT = 299, /*[11FC0]*/ 1886 /** \xrefitem stable "Stable" "Stable List" ICU 64 */ 1887 UBLOCK_WANCHO = 300, /*[1E2C0]*/ 1888 1889 // New blocks in Unicode 13.0 1890 1891 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1892 UBLOCK_CHORASMIAN = 301, /*[10FB0]*/ 1893 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1894 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302, /*[30000]*/ 1895 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1896 UBLOCK_DIVES_AKURU = 303, /*[11900]*/ 1897 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1898 UBLOCK_KHITAN_SMALL_SCRIPT = 304, /*[18B00]*/ 1899 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1900 UBLOCK_LISU_SUPPLEMENT = 305, /*[11FB0]*/ 1901 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1902 UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306, /*[1FB00]*/ 1903 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1904 UBLOCK_TANGUT_SUPPLEMENT = 307, /*[18D00]*/ 1905 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 1906 UBLOCK_YEZIDI = 308, /*[10E80]*/ 1907 1908 // New blocks in Unicode 14.0 1909 1910 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1911 UBLOCK_ARABIC_EXTENDED_B = 309, /*[0870]*/ 1912 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1913 UBLOCK_CYPRO_MINOAN = 310, /*[12F90]*/ 1914 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1915 UBLOCK_ETHIOPIC_EXTENDED_B = 311, /*[1E7E0]*/ 1916 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1917 UBLOCK_KANA_EXTENDED_B = 312, /*[1AFF0]*/ 1918 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1919 UBLOCK_LATIN_EXTENDED_F = 313, /*[10780]*/ 1920 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1921 UBLOCK_LATIN_EXTENDED_G = 314, /*[1DF00]*/ 1922 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1923 UBLOCK_OLD_UYGHUR = 315, /*[10F70]*/ 1924 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1925 UBLOCK_TANGSA = 316, /*[16A70]*/ 1926 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1927 UBLOCK_TOTO = 317, /*[1E290]*/ 1928 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1929 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 318, /*[11AB0]*/ 1930 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1931 UBLOCK_VITHKUQI = 319, /*[10570]*/ 1932 /** \xrefitem stable "Stable" "Stable List" ICU 70 */ 1933 UBLOCK_ZNAMENNY_MUSICAL_NOTATION = 320, /*[1CF00]*/ 1934 1935 // New blocks in Unicode 15.0 1936 1937 /** \xrefitem stable "Stable" "Stable List" ICU 72 */ 1938 UBLOCK_ARABIC_EXTENDED_C = 321, /*[10EC0]*/ 1939 /** \xrefitem stable "Stable" "Stable List" ICU 72 */ 1940 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 322, /*[31350]*/ 1941 /** \xrefitem stable "Stable" "Stable List" ICU 72 */ 1942 UBLOCK_CYRILLIC_EXTENDED_D = 323, /*[1E030]*/ 1943 /** \xrefitem stable "Stable" "Stable List" ICU 72 */ 1944 UBLOCK_DEVANAGARI_EXTENDED_A = 324, /*[11B00]*/ 1945 /** \xrefitem stable "Stable" "Stable List" ICU 72 */ 1946 UBLOCK_KAKTOVIK_NUMERALS = 325, /*[1D2C0]*/ 1947 /** \xrefitem stable "Stable" "Stable List" ICU 72 */ 1948 UBLOCK_KAWI = 326, /*[11F00]*/ 1949 /** \xrefitem stable "Stable" "Stable List" ICU 72 */ 1950 UBLOCK_NAG_MUNDARI = 327, /*[1E4D0]*/ 1951 1952 // New block in Unicode 15.1 1953 1954 /** \xrefitem stable "Stable" "Stable List" ICU 74 */ 1955 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 328, /*[2EBF0]*/ 1956 1957 #ifndef U_HIDE_DEPRECATED_API 1958 /** 1959 * One more than the highest normal UBlockCode value. 1960 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK). 1961 * 1962 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 1963 */ 1964 UBLOCK_COUNT = 329, 1965 #endif // U_HIDE_DEPRECATED_API 1966 1967 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1968 UBLOCK_INVALID_CODE=-1 1969 }; 1970 1971 /** \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 1972 typedef enum UBlockCode UBlockCode; 1973 1974 /** 1975 * East Asian Width constants. 1976 * 1977 * @see UCHAR_EAST_ASIAN_WIDTH 1978 * @see u_getIntPropertyValue 1979 * \xrefitem stable "Stable" "Stable List" ICU 2.2 1980 */ 1981 typedef enum UEastAsianWidth { 1982 /* 1983 * Note: UEastAsianWidth constants are parsed by preparseucd.py. 1984 * It matches lines like 1985 * U_EA_<Unicode East_Asian_Width value name> 1986 */ 1987 1988 U_EA_NEUTRAL, /*[N]*/ 1989 U_EA_AMBIGUOUS, /*[A]*/ 1990 U_EA_HALFWIDTH, /*[H]*/ 1991 U_EA_FULLWIDTH, /*[F]*/ 1992 U_EA_NARROW, /*[Na]*/ 1993 U_EA_WIDE, /*[W]*/ 1994 #ifndef U_HIDE_DEPRECATED_API 1995 /** 1996 * One more than the highest normal UEastAsianWidth value. 1997 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH). 1998 * 1999 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2000 */ 2001 U_EA_COUNT 2002 #endif // U_HIDE_DEPRECATED_API 2003 } UEastAsianWidth; 2004 2005 /** 2006 * Selector constants for u_charName(). 2007 * u_charName() returns the "modern" name of a 2008 * Unicode character; or the name that was defined in 2009 * Unicode version 1.0, before the Unicode standard merged 2010 * with ISO-10646; or an "extended" name that gives each 2011 * Unicode code point a unique name. 2012 * 2013 * @see u_charName 2014 * \xrefitem stable "Stable" "Stable List" ICU 2.0 2015 */ 2016 typedef enum UCharNameChoice { 2017 /** Unicode character name (Name property). \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 2018 U_UNICODE_CHAR_NAME, 2019 #ifndef U_HIDE_DEPRECATED_API 2020 /** 2021 * The Unicode_1_Name property value which is of little practical value. 2022 * Beginning with ICU 49, ICU APIs return an empty string for this name choice. 2023 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 49 2024 */ 2025 U_UNICODE_10_CHAR_NAME, 2026 #endif /* U_HIDE_DEPRECATED_API */ 2027 /** Standard or synthetic character name. \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 2028 U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, 2029 /** Corrected name from NameAliases.txt. \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 2030 U_CHAR_NAME_ALIAS, 2031 #ifndef U_HIDE_DEPRECATED_API 2032 /** 2033 * One more than the highest normal UCharNameChoice value. 2034 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2035 */ 2036 U_CHAR_NAME_CHOICE_COUNT 2037 #endif // U_HIDE_DEPRECATED_API 2038 } UCharNameChoice; 2039 2040 /** 2041 * Selector constants for u_getPropertyName() and 2042 * u_getPropertyValueName(). These selectors are used to choose which 2043 * name is returned for a given property or value. All properties and 2044 * values have a long name. Most have a short name, but some do not. 2045 * Unicode allows for additional names, beyond the long and short 2046 * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where 2047 * i=1, 2,... 2048 * 2049 * @see u_getPropertyName() 2050 * @see u_getPropertyValueName() 2051 * \xrefitem stable "Stable" "Stable List" ICU 2.4 2052 */ 2053 typedef enum UPropertyNameChoice { 2054 U_SHORT_PROPERTY_NAME, 2055 U_LONG_PROPERTY_NAME, 2056 #ifndef U_HIDE_DEPRECATED_API 2057 /** 2058 * One more than the highest normal UPropertyNameChoice value. 2059 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2060 */ 2061 U_PROPERTY_NAME_CHOICE_COUNT 2062 #endif // U_HIDE_DEPRECATED_API 2063 } UPropertyNameChoice; 2064 2065 /** 2066 * Decomposition Type constants. 2067 * 2068 * @see UCHAR_DECOMPOSITION_TYPE 2069 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2070 */ 2071 typedef enum UDecompositionType { 2072 /* 2073 * Note: UDecompositionType constants are parsed by preparseucd.py. 2074 * It matches lines like 2075 * U_DT_<Unicode Decomposition_Type value name> 2076 */ 2077 2078 U_DT_NONE, /*[none]*/ 2079 U_DT_CANONICAL, /*[can]*/ 2080 U_DT_COMPAT, /*[com]*/ 2081 U_DT_CIRCLE, /*[enc]*/ 2082 U_DT_FINAL, /*[fin]*/ 2083 U_DT_FONT, /*[font]*/ 2084 U_DT_FRACTION, /*[fra]*/ 2085 U_DT_INITIAL, /*[init]*/ 2086 U_DT_ISOLATED, /*[iso]*/ 2087 U_DT_MEDIAL, /*[med]*/ 2088 U_DT_NARROW, /*[nar]*/ 2089 U_DT_NOBREAK, /*[nb]*/ 2090 U_DT_SMALL, /*[sml]*/ 2091 U_DT_SQUARE, /*[sqr]*/ 2092 U_DT_SUB, /*[sub]*/ 2093 U_DT_SUPER, /*[sup]*/ 2094 U_DT_VERTICAL, /*[vert]*/ 2095 U_DT_WIDE, /*[wide]*/ 2096 #ifndef U_HIDE_DEPRECATED_API 2097 /** 2098 * One more than the highest normal UDecompositionType value. 2099 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE). 2100 * 2101 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2102 */ 2103 U_DT_COUNT /* 18 */ 2104 #endif // U_HIDE_DEPRECATED_API 2105 } UDecompositionType; 2106 2107 /** 2108 * Joining Type constants. 2109 * 2110 * @see UCHAR_JOINING_TYPE 2111 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2112 */ 2113 typedef enum UJoiningType { 2114 /* 2115 * Note: UJoiningType constants are parsed by preparseucd.py. 2116 * It matches lines like 2117 * U_JT_<Unicode Joining_Type value name> 2118 */ 2119 2120 U_JT_NON_JOINING, /*[U]*/ 2121 U_JT_JOIN_CAUSING, /*[C]*/ 2122 U_JT_DUAL_JOINING, /*[D]*/ 2123 U_JT_LEFT_JOINING, /*[L]*/ 2124 U_JT_RIGHT_JOINING, /*[R]*/ 2125 U_JT_TRANSPARENT, /*[T]*/ 2126 #ifndef U_HIDE_DEPRECATED_API 2127 /** 2128 * One more than the highest normal UJoiningType value. 2129 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE). 2130 * 2131 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2132 */ 2133 U_JT_COUNT /* 6 */ 2134 #endif // U_HIDE_DEPRECATED_API 2135 } UJoiningType; 2136 2137 /** 2138 * Joining Group constants. 2139 * 2140 * @see UCHAR_JOINING_GROUP 2141 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2142 */ 2143 typedef enum UJoiningGroup { 2144 /* 2145 * Note: UJoiningGroup constants are parsed by preparseucd.py. 2146 * It matches lines like 2147 * U_JG_<Unicode Joining_Group value name> 2148 */ 2149 2150 U_JG_NO_JOINING_GROUP, 2151 U_JG_AIN, 2152 U_JG_ALAPH, 2153 U_JG_ALEF, 2154 U_JG_BEH, 2155 U_JG_BETH, 2156 U_JG_DAL, 2157 U_JG_DALATH_RISH, 2158 U_JG_E, 2159 U_JG_FEH, 2160 U_JG_FINAL_SEMKATH, 2161 U_JG_GAF, 2162 U_JG_GAMAL, 2163 U_JG_HAH, 2164 U_JG_TEH_MARBUTA_GOAL, /**< \xrefitem stable "Stable" "Stable List" ICU 4.6 */ 2165 U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, 2166 U_JG_HE, 2167 U_JG_HEH, 2168 U_JG_HEH_GOAL, 2169 U_JG_HETH, 2170 U_JG_KAF, 2171 U_JG_KAPH, 2172 U_JG_KNOTTED_HEH, 2173 U_JG_LAM, 2174 U_JG_LAMADH, 2175 U_JG_MEEM, 2176 U_JG_MIM, 2177 U_JG_NOON, 2178 U_JG_NUN, 2179 U_JG_PE, 2180 U_JG_QAF, 2181 U_JG_QAPH, 2182 U_JG_REH, 2183 U_JG_REVERSED_PE, 2184 U_JG_SAD, 2185 U_JG_SADHE, 2186 U_JG_SEEN, 2187 U_JG_SEMKATH, 2188 U_JG_SHIN, 2189 U_JG_SWASH_KAF, 2190 U_JG_SYRIAC_WAW, 2191 U_JG_TAH, 2192 U_JG_TAW, 2193 U_JG_TEH_MARBUTA, 2194 U_JG_TETH, 2195 U_JG_WAW, 2196 U_JG_YEH, 2197 U_JG_YEH_BARREE, 2198 U_JG_YEH_WITH_TAIL, 2199 U_JG_YUDH, 2200 U_JG_YUDH_HE, 2201 U_JG_ZAIN, 2202 U_JG_FE, /**< \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 2203 U_JG_KHAPH, /**< \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 2204 U_JG_ZHAIN, /**< \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 2205 U_JG_BURUSHASKI_YEH_BARREE, /**< \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2206 U_JG_FARSI_YEH, /**< \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 2207 U_JG_NYA, /**< \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 2208 U_JG_ROHINGYA_YEH, /**< \xrefitem stable "Stable" "Stable List" ICU 49 */ 2209 U_JG_MANICHAEAN_ALEPH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2210 U_JG_MANICHAEAN_AYIN, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2211 U_JG_MANICHAEAN_BETH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2212 U_JG_MANICHAEAN_DALETH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2213 U_JG_MANICHAEAN_DHAMEDH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2214 U_JG_MANICHAEAN_FIVE, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2215 U_JG_MANICHAEAN_GIMEL, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2216 U_JG_MANICHAEAN_HETH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2217 U_JG_MANICHAEAN_HUNDRED, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2218 U_JG_MANICHAEAN_KAPH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2219 U_JG_MANICHAEAN_LAMEDH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2220 U_JG_MANICHAEAN_MEM, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2221 U_JG_MANICHAEAN_NUN, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2222 U_JG_MANICHAEAN_ONE, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2223 U_JG_MANICHAEAN_PE, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2224 U_JG_MANICHAEAN_QOPH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2225 U_JG_MANICHAEAN_RESH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2226 U_JG_MANICHAEAN_SADHE, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2227 U_JG_MANICHAEAN_SAMEKH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2228 U_JG_MANICHAEAN_TAW, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2229 U_JG_MANICHAEAN_TEN, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2230 U_JG_MANICHAEAN_TETH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2231 U_JG_MANICHAEAN_THAMEDH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2232 U_JG_MANICHAEAN_TWENTY, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2233 U_JG_MANICHAEAN_WAW, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2234 U_JG_MANICHAEAN_YODH, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2235 U_JG_MANICHAEAN_ZAYIN, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2236 U_JG_STRAIGHT_WAW, /**< \xrefitem stable "Stable" "Stable List" ICU 54 */ 2237 U_JG_AFRICAN_FEH, /**< \xrefitem stable "Stable" "Stable List" ICU 58 */ 2238 U_JG_AFRICAN_NOON, /**< \xrefitem stable "Stable" "Stable List" ICU 58 */ 2239 U_JG_AFRICAN_QAF, /**< \xrefitem stable "Stable" "Stable List" ICU 58 */ 2240 2241 U_JG_MALAYALAM_BHA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2242 U_JG_MALAYALAM_JA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2243 U_JG_MALAYALAM_LLA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2244 U_JG_MALAYALAM_LLLA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2245 U_JG_MALAYALAM_NGA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2246 U_JG_MALAYALAM_NNA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2247 U_JG_MALAYALAM_NNNA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2248 U_JG_MALAYALAM_NYA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2249 U_JG_MALAYALAM_RA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2250 U_JG_MALAYALAM_SSA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2251 U_JG_MALAYALAM_TTA, /**< \xrefitem stable "Stable" "Stable List" ICU 60 */ 2252 2253 U_JG_HANIFI_ROHINGYA_KINNA_YA, /**< \xrefitem stable "Stable" "Stable List" ICU 62 */ 2254 U_JG_HANIFI_ROHINGYA_PA, /**< \xrefitem stable "Stable" "Stable List" ICU 62 */ 2255 2256 U_JG_THIN_YEH, /**< \xrefitem stable "Stable" "Stable List" ICU 70 */ 2257 U_JG_VERTICAL_TAIL, /**< \xrefitem stable "Stable" "Stable List" ICU 70 */ 2258 2259 #ifndef U_HIDE_DEPRECATED_API 2260 /** 2261 * One more than the highest normal UJoiningGroup value. 2262 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP). 2263 * 2264 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2265 */ 2266 U_JG_COUNT 2267 #endif // U_HIDE_DEPRECATED_API 2268 } UJoiningGroup; 2269 2270 /** 2271 * Grapheme Cluster Break constants. 2272 * 2273 * @see UCHAR_GRAPHEME_CLUSTER_BREAK 2274 * \xrefitem stable "Stable" "Stable List" ICU 3.4 2275 */ 2276 typedef enum UGraphemeClusterBreak { 2277 /* 2278 * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. 2279 * It matches lines like 2280 * U_GCB_<Unicode Grapheme_Cluster_Break value name> 2281 */ 2282 2283 U_GCB_OTHER = 0, /*[XX]*/ 2284 U_GCB_CONTROL = 1, /*[CN]*/ 2285 U_GCB_CR = 2, /*[CR]*/ 2286 U_GCB_EXTEND = 3, /*[EX]*/ 2287 U_GCB_L = 4, /*[L]*/ 2288 U_GCB_LF = 5, /*[LF]*/ 2289 U_GCB_LV = 6, /*[LV]*/ 2290 U_GCB_LVT = 7, /*[LVT]*/ 2291 U_GCB_T = 8, /*[T]*/ 2292 U_GCB_V = 9, /*[V]*/ 2293 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2294 U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 2295 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2296 U_GCB_PREPEND = 11, /*[PP]*/ 2297 /** \xrefitem stable "Stable" "Stable List" ICU 50 */ 2298 U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2299 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2300 U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2301 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2302 U_GCB_E_BASE_GAZ = 14, /*[EBG]*/ 2303 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2304 U_GCB_E_MODIFIER = 15, /*[EM]*/ 2305 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2306 U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/ 2307 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2308 U_GCB_ZWJ = 17, /*[ZWJ]*/ 2309 2310 #ifndef U_HIDE_DEPRECATED_API 2311 /** 2312 * One more than the highest normal UGraphemeClusterBreak value. 2313 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK). 2314 * 2315 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2316 */ 2317 U_GCB_COUNT = 18 2318 #endif // U_HIDE_DEPRECATED_API 2319 } UGraphemeClusterBreak; 2320 2321 /** 2322 * Word Break constants. 2323 * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.) 2324 * 2325 * @see UCHAR_WORD_BREAK 2326 * \xrefitem stable "Stable" "Stable List" ICU 3.4 2327 */ 2328 typedef enum UWordBreakValues { 2329 /* 2330 * Note: UWordBreakValues constants are parsed by preparseucd.py. 2331 * It matches lines like 2332 * U_WB_<Unicode Word_Break value name> 2333 */ 2334 2335 U_WB_OTHER = 0, /*[XX]*/ 2336 U_WB_ALETTER = 1, /*[LE]*/ 2337 U_WB_FORMAT = 2, /*[FO]*/ 2338 U_WB_KATAKANA = 3, /*[KA]*/ 2339 U_WB_MIDLETTER = 4, /*[ML]*/ 2340 U_WB_MIDNUM = 5, /*[MN]*/ 2341 U_WB_NUMERIC = 6, /*[NU]*/ 2342 U_WB_EXTENDNUMLET = 7, /*[EX]*/ 2343 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2344 U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 2345 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2346 U_WB_EXTEND = 9, /*[Extend]*/ 2347 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2348 U_WB_LF = 10, /*[LF]*/ 2349 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2350 U_WB_MIDNUMLET =11, /*[MB]*/ 2351 /** \xrefitem stable "Stable" "Stable List" ICU 4.0 */ 2352 U_WB_NEWLINE =12, /*[NL]*/ 2353 /** \xrefitem stable "Stable" "Stable List" ICU 50 */ 2354 U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2355 /** \xrefitem stable "Stable" "Stable List" ICU 52 */ 2356 U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 2357 /** \xrefitem stable "Stable" "Stable List" ICU 52 */ 2358 U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ 2359 /** \xrefitem stable "Stable" "Stable List" ICU 52 */ 2360 U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ 2361 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2362 U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2363 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2364 U_WB_E_BASE_GAZ = 18, /*[EBG]*/ 2365 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2366 U_WB_E_MODIFIER = 19, /*[EM]*/ 2367 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2368 U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/ 2369 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2370 U_WB_ZWJ = 21, /*[ZWJ]*/ 2371 /** \xrefitem stable "Stable" "Stable List" ICU 62 */ 2372 U_WB_WSEGSPACE = 22, /*[WSEGSPACE]*/ 2373 2374 #ifndef U_HIDE_DEPRECATED_API 2375 /** 2376 * One more than the highest normal UWordBreakValues value. 2377 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK). 2378 * 2379 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2380 */ 2381 U_WB_COUNT = 23 2382 #endif // U_HIDE_DEPRECATED_API 2383 } UWordBreakValues; 2384 2385 /** 2386 * Sentence Break constants. 2387 * 2388 * @see UCHAR_SENTENCE_BREAK 2389 * \xrefitem stable "Stable" "Stable List" ICU 3.4 2390 */ 2391 typedef enum USentenceBreak { 2392 /* 2393 * Note: USentenceBreak constants are parsed by preparseucd.py. 2394 * It matches lines like 2395 * U_SB_<Unicode Sentence_Break value name> 2396 */ 2397 2398 U_SB_OTHER = 0, /*[XX]*/ 2399 U_SB_ATERM = 1, /*[AT]*/ 2400 U_SB_CLOSE = 2, /*[CL]*/ 2401 U_SB_FORMAT = 3, /*[FO]*/ 2402 U_SB_LOWER = 4, /*[LO]*/ 2403 U_SB_NUMERIC = 5, /*[NU]*/ 2404 U_SB_OLETTER = 6, /*[LE]*/ 2405 U_SB_SEP = 7, /*[SE]*/ 2406 U_SB_SP = 8, /*[SP]*/ 2407 U_SB_STERM = 9, /*[ST]*/ 2408 U_SB_UPPER = 10, /*[UP]*/ 2409 U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 2410 U_SB_EXTEND = 12, /*[EX]*/ 2411 U_SB_LF = 13, /*[LF]*/ 2412 U_SB_SCONTINUE = 14, /*[SC]*/ 2413 #ifndef U_HIDE_DEPRECATED_API 2414 /** 2415 * One more than the highest normal USentenceBreak value. 2416 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK). 2417 * 2418 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2419 */ 2420 U_SB_COUNT = 15 2421 #endif // U_HIDE_DEPRECATED_API 2422 } USentenceBreak; 2423 2424 /** 2425 * Line Break constants. 2426 * 2427 * @see UCHAR_LINE_BREAK 2428 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2429 */ 2430 typedef enum ULineBreak { 2431 /* 2432 * Note: ULineBreak constants are parsed by preparseucd.py. 2433 * It matches lines like 2434 * U_LB_<Unicode Line_Break value name> 2435 */ 2436 2437 U_LB_UNKNOWN = 0, /*[XX]*/ 2438 U_LB_AMBIGUOUS = 1, /*[AI]*/ 2439 U_LB_ALPHABETIC = 2, /*[AL]*/ 2440 U_LB_BREAK_BOTH = 3, /*[B2]*/ 2441 U_LB_BREAK_AFTER = 4, /*[BA]*/ 2442 U_LB_BREAK_BEFORE = 5, /*[BB]*/ 2443 U_LB_MANDATORY_BREAK = 6, /*[BK]*/ 2444 U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ 2445 U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ 2446 U_LB_COMBINING_MARK = 9, /*[CM]*/ 2447 U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ 2448 U_LB_EXCLAMATION = 11, /*[EX]*/ 2449 U_LB_GLUE = 12, /*[GL]*/ 2450 U_LB_HYPHEN = 13, /*[HY]*/ 2451 U_LB_IDEOGRAPHIC = 14, /*[ID]*/ 2452 /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 \xrefitem stable "Stable" "Stable List" ICU 3.0 */ 2453 U_LB_INSEPARABLE = 15, /*[IN]*/ 2454 U_LB_INSEPERABLE = U_LB_INSEPARABLE, 2455 U_LB_INFIX_NUMERIC = 16, /*[IS]*/ 2456 U_LB_LINE_FEED = 17, /*[LF]*/ 2457 U_LB_NONSTARTER = 18, /*[NS]*/ 2458 U_LB_NUMERIC = 19, /*[NU]*/ 2459 U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ 2460 U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ 2461 U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ 2462 U_LB_QUOTATION = 23, /*[QU]*/ 2463 U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ 2464 U_LB_SURROGATE = 25, /*[SG]*/ 2465 U_LB_SPACE = 26, /*[SP]*/ 2466 U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ 2467 U_LB_ZWSPACE = 28, /*[ZW]*/ 2468 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 2469 U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 2470 /** \xrefitem stable "Stable" "Stable List" ICU 2.6 */ 2471 U_LB_WORD_JOINER = 30, /*[WJ]*/ 2472 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 2473 U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ 2474 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 2475 U_LB_H3 = 32, /*[H3]*/ 2476 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 2477 U_LB_JL = 33, /*[JL]*/ 2478 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 2479 U_LB_JT = 34, /*[JT]*/ 2480 /** \xrefitem stable "Stable" "Stable List" ICU 3.4 */ 2481 U_LB_JV = 35, /*[JV]*/ 2482 /** \xrefitem stable "Stable" "Stable List" ICU 4.4 */ 2483 U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 2484 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 2485 U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 2486 /** \xrefitem stable "Stable" "Stable List" ICU 49 */ 2487 U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 2488 /** \xrefitem stable "Stable" "Stable List" ICU 50 */ 2489 U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2490 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2491 U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2492 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2493 U_LB_E_MODIFIER = 41, /*[EM]*/ 2494 /** \xrefitem stable "Stable" "Stable List" ICU 58 */ 2495 U_LB_ZWJ = 42, /*[ZWJ]*/ 2496 /** \xrefitem stable "Stable" "Stable List" ICU 74 */ 2497 U_LB_AKSARA = 43, /*[AK]*/ 2498 /** \xrefitem stable "Stable" "Stable List" ICU 74 */ 2499 U_LB_AKSARA_PREBASE = 44, /*[AP]*/ 2500 /** \xrefitem stable "Stable" "Stable List" ICU 74 */ 2501 U_LB_AKSARA_START = 45, /*[AS]*/ 2502 /** \xrefitem stable "Stable" "Stable List" ICU 74 */ 2503 U_LB_VIRAMA_FINAL = 46, /*[VF]*/ 2504 /** \xrefitem stable "Stable" "Stable List" ICU 74 */ 2505 U_LB_VIRAMA = 47, /*[VI]*/ 2506 #ifndef U_HIDE_DEPRECATED_API 2507 /** 2508 * One more than the highest normal ULineBreak value. 2509 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK). 2510 * 2511 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2512 */ 2513 U_LB_COUNT = 48 2514 #endif // U_HIDE_DEPRECATED_API 2515 } ULineBreak; 2516 2517 /** 2518 * Numeric Type constants. 2519 * 2520 * @see UCHAR_NUMERIC_TYPE 2521 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2522 */ 2523 typedef enum UNumericType { 2524 /* 2525 * Note: UNumericType constants are parsed by preparseucd.py. 2526 * It matches lines like 2527 * U_NT_<Unicode Numeric_Type value name> 2528 */ 2529 2530 U_NT_NONE, /*[None]*/ 2531 U_NT_DECIMAL, /*[de]*/ 2532 U_NT_DIGIT, /*[di]*/ 2533 U_NT_NUMERIC, /*[nu]*/ 2534 #ifndef U_HIDE_DEPRECATED_API 2535 /** 2536 * One more than the highest normal UNumericType value. 2537 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). 2538 * 2539 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2540 */ 2541 U_NT_COUNT 2542 #endif // U_HIDE_DEPRECATED_API 2543 } UNumericType; 2544 2545 /** 2546 * Hangul Syllable Type constants. 2547 * 2548 * @see UCHAR_HANGUL_SYLLABLE_TYPE 2549 * \xrefitem stable "Stable" "Stable List" ICU 2.6 2550 */ 2551 typedef enum UHangulSyllableType { 2552 /* 2553 * Note: UHangulSyllableType constants are parsed by preparseucd.py. 2554 * It matches lines like 2555 * U_HST_<Unicode Hangul_Syllable_Type value name> 2556 */ 2557 2558 U_HST_NOT_APPLICABLE, /*[NA]*/ 2559 U_HST_LEADING_JAMO, /*[L]*/ 2560 U_HST_VOWEL_JAMO, /*[V]*/ 2561 U_HST_TRAILING_JAMO, /*[T]*/ 2562 U_HST_LV_SYLLABLE, /*[LV]*/ 2563 U_HST_LVT_SYLLABLE, /*[LVT]*/ 2564 #ifndef U_HIDE_DEPRECATED_API 2565 /** 2566 * One more than the highest normal UHangulSyllableType value. 2567 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). 2568 * 2569 * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 2570 */ 2571 U_HST_COUNT 2572 #endif // U_HIDE_DEPRECATED_API 2573 } UHangulSyllableType; 2574 2575 /** 2576 * Indic Positional Category constants. 2577 * 2578 * @see UCHAR_INDIC_POSITIONAL_CATEGORY 2579 * \xrefitem stable "Stable" "Stable List" ICU 63 2580 */ 2581 typedef enum UIndicPositionalCategory { 2582 /* 2583 * Note: UIndicPositionalCategory constants are parsed by preparseucd.py. 2584 * It matches lines like 2585 * U_INPC_<Unicode Indic_Positional_Category value name> 2586 */ 2587 2588 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2589 U_INPC_NA, 2590 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2591 U_INPC_BOTTOM, 2592 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2593 U_INPC_BOTTOM_AND_LEFT, 2594 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2595 U_INPC_BOTTOM_AND_RIGHT, 2596 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2597 U_INPC_LEFT, 2598 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2599 U_INPC_LEFT_AND_RIGHT, 2600 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2601 U_INPC_OVERSTRUCK, 2602 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2603 U_INPC_RIGHT, 2604 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2605 U_INPC_TOP, 2606 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2607 U_INPC_TOP_AND_BOTTOM, 2608 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2609 U_INPC_TOP_AND_BOTTOM_AND_RIGHT, 2610 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2611 U_INPC_TOP_AND_LEFT, 2612 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2613 U_INPC_TOP_AND_LEFT_AND_RIGHT, 2614 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2615 U_INPC_TOP_AND_RIGHT, 2616 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2617 U_INPC_VISUAL_ORDER_LEFT, 2618 /** \xrefitem stable "Stable" "Stable List" ICU 66 */ 2619 U_INPC_TOP_AND_BOTTOM_AND_LEFT, 2620 } UIndicPositionalCategory; 2621 2622 /** 2623 * Indic Syllabic Category constants. 2624 * 2625 * @see UCHAR_INDIC_SYLLABIC_CATEGORY 2626 * \xrefitem stable "Stable" "Stable List" ICU 63 2627 */ 2628 typedef enum UIndicSyllabicCategory { 2629 /* 2630 * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py. 2631 * It matches lines like 2632 * U_INSC_<Unicode Indic_Syllabic_Category value name> 2633 */ 2634 2635 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2636 U_INSC_OTHER, 2637 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2638 U_INSC_AVAGRAHA, 2639 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2640 U_INSC_BINDU, 2641 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2642 U_INSC_BRAHMI_JOINING_NUMBER, 2643 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2644 U_INSC_CANTILLATION_MARK, 2645 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2646 U_INSC_CONSONANT, 2647 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2648 U_INSC_CONSONANT_DEAD, 2649 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2650 U_INSC_CONSONANT_FINAL, 2651 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2652 U_INSC_CONSONANT_HEAD_LETTER, 2653 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2654 U_INSC_CONSONANT_INITIAL_POSTFIXED, 2655 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2656 U_INSC_CONSONANT_KILLER, 2657 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2658 U_INSC_CONSONANT_MEDIAL, 2659 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2660 U_INSC_CONSONANT_PLACEHOLDER, 2661 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2662 U_INSC_CONSONANT_PRECEDING_REPHA, 2663 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2664 U_INSC_CONSONANT_PREFIXED, 2665 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2666 U_INSC_CONSONANT_SUBJOINED, 2667 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2668 U_INSC_CONSONANT_SUCCEEDING_REPHA, 2669 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2670 U_INSC_CONSONANT_WITH_STACKER, 2671 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2672 U_INSC_GEMINATION_MARK, 2673 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2674 U_INSC_INVISIBLE_STACKER, 2675 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2676 U_INSC_JOINER, 2677 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2678 U_INSC_MODIFYING_LETTER, 2679 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2680 U_INSC_NON_JOINER, 2681 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2682 U_INSC_NUKTA, 2683 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2684 U_INSC_NUMBER, 2685 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2686 U_INSC_NUMBER_JOINER, 2687 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2688 U_INSC_PURE_KILLER, 2689 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2690 U_INSC_REGISTER_SHIFTER, 2691 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2692 U_INSC_SYLLABLE_MODIFIER, 2693 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2694 U_INSC_TONE_LETTER, 2695 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2696 U_INSC_TONE_MARK, 2697 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2698 U_INSC_VIRAMA, 2699 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2700 U_INSC_VISARGA, 2701 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2702 U_INSC_VOWEL, 2703 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2704 U_INSC_VOWEL_DEPENDENT, 2705 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2706 U_INSC_VOWEL_INDEPENDENT, 2707 } UIndicSyllabicCategory; 2708 2709 /** 2710 * Vertical Orientation constants. 2711 * 2712 * @see UCHAR_VERTICAL_ORIENTATION 2713 * \xrefitem stable "Stable" "Stable List" ICU 63 2714 */ 2715 typedef enum UVerticalOrientation { 2716 /* 2717 * Note: UVerticalOrientation constants are parsed by preparseucd.py. 2718 * It matches lines like 2719 * U_VO_<Unicode Vertical_Orientation value name> 2720 */ 2721 2722 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2723 U_VO_ROTATED, 2724 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2725 U_VO_TRANSFORMED_ROTATED, 2726 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2727 U_VO_TRANSFORMED_UPRIGHT, 2728 /** \xrefitem stable "Stable" "Stable List" ICU 63 */ 2729 U_VO_UPRIGHT, 2730 } UVerticalOrientation; 2731 2732 #ifndef U_HIDE_DRAFT_API 2733 /** 2734 * Identifier Status constants. 2735 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. 2736 * 2737 * @see UCHAR_IDENTIFIER_STATUS 2738 * \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 2739 */ 2740 typedef enum UIdentifierStatus { 2741 /* 2742 * Note: UIdentifierStatus constants are parsed by preparseucd.py. 2743 * It matches lines like 2744 * U_ID_STATUS_<Unicode Identifier_Status value name> 2745 */ 2746 2747 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2748 U_ID_STATUS_RESTRICTED, 2749 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2750 U_ID_STATUS_ALLOWED, 2751 } UIdentifierStatus; 2752 2753 /** 2754 * Identifier Type constants. 2755 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. 2756 * 2757 * @see UCHAR_IDENTIFIER_TYPE 2758 * \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 2759 */ 2760 typedef enum UIdentifierType { 2761 /* 2762 * Note: UIdentifierType constants are parsed by preparseucd.py. 2763 * It matches lines like 2764 * U_ID_TYPE_<Unicode Identifier_Type value name> 2765 */ 2766 2767 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2768 U_ID_TYPE_NOT_CHARACTER, 2769 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2770 U_ID_TYPE_DEPRECATED, 2771 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2772 U_ID_TYPE_DEFAULT_IGNORABLE, 2773 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2774 U_ID_TYPE_NOT_NFKC, 2775 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2776 U_ID_TYPE_NOT_XID, 2777 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2778 U_ID_TYPE_EXCLUSION, 2779 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2780 U_ID_TYPE_OBSOLETE, 2781 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2782 U_ID_TYPE_TECHNICAL, 2783 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2784 U_ID_TYPE_UNCOMMON_USE, 2785 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2786 U_ID_TYPE_LIMITED_USE, 2787 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2788 U_ID_TYPE_INCLUSION, 2789 /** \xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in ICU 75 */ 2790 U_ID_TYPE_RECOMMENDED, 2791 } UIdentifierType; 2792 #endif // U_HIDE_DRAFT_API 2793 2794 /** 2795 * Check a binary Unicode property for a code point. 2796 * 2797 * Unicode, especially in version 3.2, defines many more properties than the 2798 * original set in UnicodeData.txt. 2799 * 2800 * The properties APIs are intended to reflect Unicode properties as defined 2801 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). 2802 * For details about the properties see http://www.unicode.org/ucd/ . 2803 * For names of Unicode properties see the UCD file PropertyAliases.txt. 2804 * 2805 * Important: If ICU is built with UCD files from Unicode versions below 3.2, 2806 * then properties marked with "new in Unicode 3.2" are not or not fully available. 2807 * 2808 * @param c Code point to test. 2809 * @param which UProperty selector constant, identifies which binary property to check. 2810 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. 2811 * @return true or false according to the binary Unicode property value for c. 2812 * Also false if 'which' is out of bounds or if the Unicode version 2813 * does not have data for the property at all. 2814 * 2815 * @see UProperty 2816 * @see u_getBinaryPropertySet 2817 * @see u_getIntPropertyValue 2818 * @see u_getUnicodeVersion 2819 * \xrefitem stable "Stable" "Stable List" ICU 2.1 2820 */ 2821 U_CAPI UBool U_EXPORT2 2822 u_hasBinaryProperty(UChar32 c, UProperty which) __INTRODUCED_IN(31); 2823 2824 2825 2826 2827 2828 2829 2830 /** 2831 * Check if a code point has the Alphabetic Unicode property. 2832 * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). 2833 * This is different from u_isalpha! 2834 * @param c Code point to test 2835 * @return true if the code point has the Alphabetic Unicode property, false otherwise 2836 * 2837 * @see UCHAR_ALPHABETIC 2838 * @see u_isalpha 2839 * @see u_hasBinaryProperty 2840 * \xrefitem stable "Stable" "Stable List" ICU 2.1 2841 */ 2842 U_CAPI UBool U_EXPORT2 2843 u_isUAlphabetic(UChar32 c) __INTRODUCED_IN(31); 2844 2845 2846 2847 /** 2848 * Check if a code point has the Lowercase Unicode property. 2849 * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). 2850 * This is different from u_islower! 2851 * @param c Code point to test 2852 * @return true if the code point has the Lowercase Unicode property, false otherwise 2853 * 2854 * @see UCHAR_LOWERCASE 2855 * @see u_islower 2856 * @see u_hasBinaryProperty 2857 * \xrefitem stable "Stable" "Stable List" ICU 2.1 2858 */ 2859 U_CAPI UBool U_EXPORT2 2860 u_isULowercase(UChar32 c) __INTRODUCED_IN(31); 2861 2862 2863 2864 /** 2865 * Check if a code point has the Uppercase Unicode property. 2866 * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). 2867 * This is different from u_isupper! 2868 * @param c Code point to test 2869 * @return true if the code point has the Uppercase Unicode property, false otherwise 2870 * 2871 * @see UCHAR_UPPERCASE 2872 * @see u_isupper 2873 * @see u_hasBinaryProperty 2874 * \xrefitem stable "Stable" "Stable List" ICU 2.1 2875 */ 2876 U_CAPI UBool U_EXPORT2 2877 u_isUUppercase(UChar32 c) __INTRODUCED_IN(31); 2878 2879 2880 2881 /** 2882 * Check if a code point has the White_Space Unicode property. 2883 * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). 2884 * This is different from both u_isspace and u_isWhitespace! 2885 * 2886 * Note: There are several ICU whitespace functions; please see the uchar.h 2887 * file documentation for a detailed comparison. 2888 * 2889 * @param c Code point to test 2890 * @return true if the code point has the White_Space Unicode property, false otherwise. 2891 * 2892 * @see UCHAR_WHITE_SPACE 2893 * @see u_isWhitespace 2894 * @see u_isspace 2895 * @see u_isJavaSpaceChar 2896 * @see u_hasBinaryProperty 2897 * \xrefitem stable "Stable" "Stable List" ICU 2.1 2898 */ 2899 U_CAPI UBool U_EXPORT2 2900 u_isUWhiteSpace(UChar32 c) __INTRODUCED_IN(31); 2901 2902 2903 2904 /** 2905 * Get the property value for an enumerated or integer Unicode property for a code point. 2906 * Also returns binary and mask property values. 2907 * 2908 * Unicode, especially in version 3.2, defines many more properties than the 2909 * original set in UnicodeData.txt. 2910 * 2911 * The properties APIs are intended to reflect Unicode properties as defined 2912 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). 2913 * For details about the properties see http://www.unicode.org/ . 2914 * For names of Unicode properties see the UCD file PropertyAliases.txt. 2915 * 2916 * Sample usage: 2917 * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); 2918 * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); 2919 * 2920 * @param c Code point to test. 2921 * @param which UProperty selector constant, identifies which property to check. 2922 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 2923 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT 2924 * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. 2925 * @return Numeric value that is directly the property value or, 2926 * for enumerated properties, corresponds to the numeric value of the enumerated 2927 * constant of the respective property value enumeration type 2928 * (cast to enum type if necessary). 2929 * Returns 0 or 1 (for false/true) for binary Unicode properties. 2930 * Returns a bit-mask for mask properties. 2931 * Returns 0 if 'which' is out of bounds or if the Unicode version 2932 * does not have data for the property at all, or not for this code point. 2933 * 2934 * @see UProperty 2935 * @see u_hasBinaryProperty 2936 * @see u_getIntPropertyMinValue 2937 * @see u_getIntPropertyMaxValue 2938 * @see u_getIntPropertyMap 2939 * @see u_getUnicodeVersion 2940 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2941 */ 2942 U_CAPI int32_t U_EXPORT2 2943 u_getIntPropertyValue(UChar32 c, UProperty which) __INTRODUCED_IN(31); 2944 2945 2946 2947 /** 2948 * Get the minimum value for an enumerated/integer/binary Unicode property. 2949 * Can be used together with u_getIntPropertyMaxValue 2950 * to allocate arrays of UnicodeSet or similar. 2951 * 2952 * @param which UProperty selector constant, identifies which binary property to check. 2953 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 2954 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. 2955 * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. 2956 * 0 if the property selector is out of range. 2957 * 2958 * @see UProperty 2959 * @see u_hasBinaryProperty 2960 * @see u_getUnicodeVersion 2961 * @see u_getIntPropertyMaxValue 2962 * @see u_getIntPropertyValue 2963 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2964 */ 2965 U_CAPI int32_t U_EXPORT2 2966 u_getIntPropertyMinValue(UProperty which) __INTRODUCED_IN(31); 2967 2968 2969 2970 /** 2971 * Get the maximum value for an enumerated/integer/binary Unicode property. 2972 * Can be used together with u_getIntPropertyMinValue 2973 * to allocate arrays of UnicodeSet or similar. 2974 * 2975 * Examples for min/max values (for Unicode 3.2): 2976 * 2977 * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) 2978 * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) 2979 * - UCHAR_IDEOGRAPHIC: 0/1 (false/true) 2980 * 2981 * For undefined UProperty constant values, min/max values will be 0/-1. 2982 * 2983 * @param which UProperty selector constant, identifies which binary property to check. 2984 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 2985 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. 2986 * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. 2987 * <=0 if the property selector is out of range. 2988 * 2989 * @see UProperty 2990 * @see u_hasBinaryProperty 2991 * @see u_getUnicodeVersion 2992 * @see u_getIntPropertyMaxValue 2993 * @see u_getIntPropertyValue 2994 * \xrefitem stable "Stable" "Stable List" ICU 2.2 2995 */ 2996 U_CAPI int32_t U_EXPORT2 2997 u_getIntPropertyMaxValue(UProperty which) __INTRODUCED_IN(31); 2998 2999 3000 3001 3002 3003 /** 3004 * Get the numeric value for a Unicode code point as defined in the 3005 * Unicode Character Database. 3006 * 3007 * A "double" return type is necessary because 3008 * some numeric values are fractions, negative, or too large for int32_t. 3009 * 3010 * For characters without any numeric values in the Unicode Character Database, 3011 * this function will return U_NO_NUMERIC_VALUE. 3012 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3013 * (NaN is not available on all platforms.) 3014 * 3015 * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() 3016 * also supports negative values, large values, and fractions, 3017 * while Java's getNumericValue() returns values 10..35 for ASCII letters. 3018 * 3019 * @param c Code point to get the numeric value for. 3020 * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. 3021 * 3022 * @see U_NO_NUMERIC_VALUE 3023 * \xrefitem stable "Stable" "Stable List" ICU 2.2 3024 */ 3025 U_CAPI double U_EXPORT2 3026 u_getNumericValue(UChar32 c) __INTRODUCED_IN(31); 3027 3028 3029 3030 /** 3031 * Special value that is returned by u_getNumericValue when 3032 * no numeric value is defined for a code point. 3033 * 3034 * @see u_getNumericValue 3035 * \xrefitem stable "Stable" "Stable List" ICU 2.2 3036 */ 3037 #define U_NO_NUMERIC_VALUE ((double)-123456789.) 3038 3039 /** 3040 * Determines whether the specified code point has the general category "Ll" 3041 * (lowercase letter). 3042 * 3043 * Same as java.lang.Character.isLowerCase(). 3044 * 3045 * This misses some characters that are also lowercase but 3046 * have a different general category value. 3047 * In order to include those, use UCHAR_LOWERCASE. 3048 * 3049 * In addition to being equivalent to a Java function, this also serves 3050 * as a C/POSIX migration function. 3051 * See the comments about C/POSIX character classification functions in the 3052 * documentation at the top of this header file. 3053 * 3054 * @param c the code point to be tested 3055 * @return true if the code point is an Ll lowercase letter 3056 * 3057 * @see UCHAR_LOWERCASE 3058 * @see u_isupper 3059 * @see u_istitle 3060 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3061 */ 3062 U_CAPI UBool U_EXPORT2 3063 u_islower(UChar32 c) __INTRODUCED_IN(31); 3064 3065 3066 3067 /** 3068 * Determines whether the specified code point has the general category "Lu" 3069 * (uppercase letter). 3070 * 3071 * Same as java.lang.Character.isUpperCase(). 3072 * 3073 * This misses some characters that are also uppercase but 3074 * have a different general category value. 3075 * In order to include those, use UCHAR_UPPERCASE. 3076 * 3077 * In addition to being equivalent to a Java function, this also serves 3078 * as a C/POSIX migration function. 3079 * See the comments about C/POSIX character classification functions in the 3080 * documentation at the top of this header file. 3081 * 3082 * @param c the code point to be tested 3083 * @return true if the code point is an Lu uppercase letter 3084 * 3085 * @see UCHAR_UPPERCASE 3086 * @see u_islower 3087 * @see u_istitle 3088 * @see u_tolower 3089 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3090 */ 3091 U_CAPI UBool U_EXPORT2 3092 u_isupper(UChar32 c) __INTRODUCED_IN(31); 3093 3094 3095 3096 /** 3097 * Determines whether the specified code point is a titlecase letter. 3098 * True for general category "Lt" (titlecase letter). 3099 * 3100 * Same as java.lang.Character.isTitleCase(). 3101 * 3102 * @param c the code point to be tested 3103 * @return true if the code point is an Lt titlecase letter 3104 * 3105 * @see u_isupper 3106 * @see u_islower 3107 * @see u_totitle 3108 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3109 */ 3110 U_CAPI UBool U_EXPORT2 3111 u_istitle(UChar32 c) __INTRODUCED_IN(31); 3112 3113 3114 3115 /** 3116 * Determines whether the specified code point is a digit character according to Java. 3117 * True for characters with general category "Nd" (decimal digit numbers). 3118 * Beginning with Unicode 4, this is the same as 3119 * testing for the Numeric_Type of Decimal. 3120 * 3121 * Same as java.lang.Character.isDigit(). 3122 * 3123 * In addition to being equivalent to a Java function, this also serves 3124 * as a C/POSIX migration function. 3125 * See the comments about C/POSIX character classification functions in the 3126 * documentation at the top of this header file. 3127 * 3128 * @param c the code point to be tested 3129 * @return true if the code point is a digit character according to Character.isDigit() 3130 * 3131 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3132 */ 3133 U_CAPI UBool U_EXPORT2 3134 u_isdigit(UChar32 c) __INTRODUCED_IN(31); 3135 3136 3137 3138 /** 3139 * Determines whether the specified code point is a letter character. 3140 * True for general categories "L" (letters). 3141 * 3142 * Same as java.lang.Character.isLetter(). 3143 * 3144 * In addition to being equivalent to a Java function, this also serves 3145 * as a C/POSIX migration function. 3146 * See the comments about C/POSIX character classification functions in the 3147 * documentation at the top of this header file. 3148 * 3149 * @param c the code point to be tested 3150 * @return true if the code point is a letter character 3151 * 3152 * @see u_isdigit 3153 * @see u_isalnum 3154 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3155 */ 3156 U_CAPI UBool U_EXPORT2 3157 u_isalpha(UChar32 c) __INTRODUCED_IN(31); 3158 3159 3160 3161 /** 3162 * Determines whether the specified code point is an alphanumeric character 3163 * (letter or digit) according to Java. 3164 * True for characters with general categories 3165 * "L" (letters) and "Nd" (decimal digit numbers). 3166 * 3167 * Same as java.lang.Character.isLetterOrDigit(). 3168 * 3169 * In addition to being equivalent to a Java function, this also serves 3170 * as a C/POSIX migration function. 3171 * See the comments about C/POSIX character classification functions in the 3172 * documentation at the top of this header file. 3173 * 3174 * @param c the code point to be tested 3175 * @return true if the code point is an alphanumeric character according to Character.isLetterOrDigit() 3176 * 3177 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3178 */ 3179 U_CAPI UBool U_EXPORT2 3180 u_isalnum(UChar32 c) __INTRODUCED_IN(31); 3181 3182 3183 3184 /** 3185 * Determines whether the specified code point is a hexadecimal digit. 3186 * This is equivalent to u_digit(c, 16)>=0. 3187 * True for characters with general category "Nd" (decimal digit numbers) 3188 * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. 3189 * (That is, for letters with code points 3190 * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) 3191 * 3192 * In order to narrow the definition of hexadecimal digits to only ASCII 3193 * characters, use (c<=0x7f && u_isxdigit(c)). 3194 * 3195 * This is a C/POSIX migration function. 3196 * See the comments about C/POSIX character classification functions in the 3197 * documentation at the top of this header file. 3198 * 3199 * @param c the code point to be tested 3200 * @return true if the code point is a hexadecimal digit 3201 * 3202 * \xrefitem stable "Stable" "Stable List" ICU 2.6 3203 */ 3204 U_CAPI UBool U_EXPORT2 3205 u_isxdigit(UChar32 c) __INTRODUCED_IN(31); 3206 3207 3208 3209 /** 3210 * Determines whether the specified code point is a punctuation character. 3211 * True for characters with general categories "P" (punctuation). 3212 * 3213 * This is a C/POSIX migration function. 3214 * See the comments about C/POSIX character classification functions in the 3215 * documentation at the top of this header file. 3216 * 3217 * @param c the code point to be tested 3218 * @return true if the code point is a punctuation character 3219 * 3220 * \xrefitem stable "Stable" "Stable List" ICU 2.6 3221 */ 3222 U_CAPI UBool U_EXPORT2 3223 u_ispunct(UChar32 c) __INTRODUCED_IN(31); 3224 3225 3226 3227 /** 3228 * Determines whether the specified code point is a "graphic" character 3229 * (printable, excluding spaces). 3230 * true for all characters except those with general categories 3231 * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), 3232 * "Cn" (unassigned), and "Z" (separators). 3233 * 3234 * This is a C/POSIX migration function. 3235 * See the comments about C/POSIX character classification functions in the 3236 * documentation at the top of this header file. 3237 * 3238 * @param c the code point to be tested 3239 * @return true if the code point is a "graphic" character 3240 * 3241 * \xrefitem stable "Stable" "Stable List" ICU 2.6 3242 */ 3243 U_CAPI UBool U_EXPORT2 3244 u_isgraph(UChar32 c) __INTRODUCED_IN(31); 3245 3246 3247 3248 /** 3249 * Determines whether the specified code point is a "blank" or "horizontal space", 3250 * a character that visibly separates words on a line. 3251 * The following are equivalent definitions: 3252 * 3253 * true for Unicode White_Space characters except for "vertical space controls" 3254 * where "vertical space controls" are the following characters: 3255 * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) 3256 * 3257 * same as 3258 * 3259 * true for U+0009 (TAB) and characters with general category "Zs" (space separators). 3260 * 3261 * Note: There are several ICU whitespace functions; please see the uchar.h 3262 * file documentation for a detailed comparison. 3263 * 3264 * This is a C/POSIX migration function. 3265 * See the comments about C/POSIX character classification functions in the 3266 * documentation at the top of this header file. 3267 * 3268 * @param c the code point to be tested 3269 * @return true if the code point is a "blank" 3270 * 3271 * \xrefitem stable "Stable" "Stable List" ICU 2.6 3272 */ 3273 U_CAPI UBool U_EXPORT2 3274 u_isblank(UChar32 c) __INTRODUCED_IN(31); 3275 3276 3277 3278 /** 3279 * Determines whether the specified code point is "defined", 3280 * which usually means that it is assigned a character. 3281 * True for general categories other than "Cn" (other, not assigned), 3282 * i.e., true for all code points mentioned in UnicodeData.txt. 3283 * 3284 * Note that non-character code points (e.g., U+FDD0) are not "defined" 3285 * (they are Cn), but surrogate code points are "defined" (Cs). 3286 * 3287 * Same as java.lang.Character.isDefined(). 3288 * 3289 * @param c the code point to be tested 3290 * @return true if the code point is assigned a character 3291 * 3292 * @see u_isdigit 3293 * @see u_isalpha 3294 * @see u_isalnum 3295 * @see u_isupper 3296 * @see u_islower 3297 * @see u_istitle 3298 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3299 */ 3300 U_CAPI UBool U_EXPORT2 3301 u_isdefined(UChar32 c) __INTRODUCED_IN(31); 3302 3303 3304 3305 /** 3306 * Determines if the specified character is a space character or not. 3307 * 3308 * Note: There are several ICU whitespace functions; please see the uchar.h 3309 * file documentation for a detailed comparison. 3310 * 3311 * This is a C/POSIX migration function. 3312 * See the comments about C/POSIX character classification functions in the 3313 * documentation at the top of this header file. 3314 * 3315 * @param c the character to be tested 3316 * @return true if the character is a space character; false otherwise. 3317 * 3318 * @see u_isJavaSpaceChar 3319 * @see u_isWhitespace 3320 * @see u_isUWhiteSpace 3321 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3322 */ 3323 U_CAPI UBool U_EXPORT2 3324 u_isspace(UChar32 c) __INTRODUCED_IN(31); 3325 3326 3327 3328 /** 3329 * Determine if the specified code point is a space character according to Java. 3330 * True for characters with general categories "Z" (separators), 3331 * which does not include control codes (e.g., TAB or Line Feed). 3332 * 3333 * Same as java.lang.Character.isSpaceChar(). 3334 * 3335 * Note: There are several ICU whitespace functions; please see the uchar.h 3336 * file documentation for a detailed comparison. 3337 * 3338 * @param c the code point to be tested 3339 * @return true if the code point is a space character according to Character.isSpaceChar() 3340 * 3341 * @see u_isspace 3342 * @see u_isWhitespace 3343 * @see u_isUWhiteSpace 3344 * \xrefitem stable "Stable" "Stable List" ICU 2.6 3345 */ 3346 U_CAPI UBool U_EXPORT2 3347 u_isJavaSpaceChar(UChar32 c) __INTRODUCED_IN(31); 3348 3349 3350 3351 /** 3352 * Determines if the specified code point is a whitespace character according to Java/ICU. 3353 * A character is considered to be a Java whitespace character if and only 3354 * if it satisfies one of the following criteria: 3355 * 3356 * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3357 * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). 3358 * - It is U+0009 HORIZONTAL TABULATION. 3359 * - It is U+000A LINE FEED. 3360 * - It is U+000B VERTICAL TABULATION. 3361 * - It is U+000C FORM FEED. 3362 * - It is U+000D CARRIAGE RETURN. 3363 * - It is U+001C FILE SEPARATOR. 3364 * - It is U+001D GROUP SEPARATOR. 3365 * - It is U+001E RECORD SEPARATOR. 3366 * - It is U+001F UNIT SEPARATOR. 3367 * 3368 * This API tries to sync with the semantics of Java's 3369 * java.lang.Character.isWhitespace(), but it may not return 3370 * the exact same results because of the Unicode version 3371 * difference. 3372 * 3373 * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3374 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3375 * See http://www.unicode.org/versions/Unicode4.0.1/ 3376 * 3377 * Note: There are several ICU whitespace functions; please see the uchar.h 3378 * file documentation for a detailed comparison. 3379 * 3380 * @param c the code point to be tested 3381 * @return true if the code point is a whitespace character according to Java/ICU 3382 * 3383 * @see u_isspace 3384 * @see u_isJavaSpaceChar 3385 * @see u_isUWhiteSpace 3386 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3387 */ 3388 U_CAPI UBool U_EXPORT2 3389 u_isWhitespace(UChar32 c) __INTRODUCED_IN(31); 3390 3391 3392 3393 /** 3394 * Determines whether the specified code point is a control character 3395 * (as defined by this function). 3396 * A control character is one of the following: 3397 * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) 3398 * - U_CONTROL_CHAR (Cc) 3399 * - U_FORMAT_CHAR (Cf) 3400 * - U_LINE_SEPARATOR (Zl) 3401 * - U_PARAGRAPH_SEPARATOR (Zp) 3402 * 3403 * This is a C/POSIX migration function. 3404 * See the comments about C/POSIX character classification functions in the 3405 * documentation at the top of this header file. 3406 * 3407 * @param c the code point to be tested 3408 * @return true if the code point is a control character 3409 * 3410 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT 3411 * @see u_isprint 3412 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3413 */ 3414 U_CAPI UBool U_EXPORT2 3415 u_iscntrl(UChar32 c) __INTRODUCED_IN(31); 3416 3417 3418 3419 /** 3420 * Determines whether the specified code point is an ISO control code. 3421 * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). 3422 * 3423 * Same as java.lang.Character.isISOControl(). 3424 * 3425 * @param c the code point to be tested 3426 * @return true if the code point is an ISO control code 3427 * 3428 * @see u_iscntrl 3429 * \xrefitem stable "Stable" "Stable List" ICU 2.6 3430 */ 3431 U_CAPI UBool U_EXPORT2 3432 u_isISOControl(UChar32 c) __INTRODUCED_IN(31); 3433 3434 3435 3436 /** 3437 * Determines whether the specified code point is a printable character. 3438 * True for general categories <em>other</em> than "C" (controls). 3439 * 3440 * This is a C/POSIX migration function. 3441 * See the comments about C/POSIX character classification functions in the 3442 * documentation at the top of this header file. 3443 * 3444 * @param c the code point to be tested 3445 * @return true if the code point is a printable character 3446 * 3447 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT 3448 * @see u_iscntrl 3449 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3450 */ 3451 U_CAPI UBool U_EXPORT2 3452 u_isprint(UChar32 c) __INTRODUCED_IN(31); 3453 3454 3455 3456 /** 3457 * Non-standard: Determines whether the specified code point is a base character. 3458 * True for general categories "L" (letters), "N" (numbers), 3459 * "Mc" (spacing combining marks), and "Me" (enclosing marks). 3460 * 3461 * Note that this is different from the Unicode Standard definition in 3462 * chapter 3.6, conformance clause D51 “Base character”, 3463 * which defines base characters as the code points with general categories 3464 * Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs). 3465 * 3466 * @param c the code point to be tested 3467 * @return true if the code point is a base character according to this function 3468 * 3469 * @see u_isalpha 3470 * @see u_isdigit 3471 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3472 */ 3473 U_CAPI UBool U_EXPORT2 3474 u_isbase(UChar32 c) __INTRODUCED_IN(31); 3475 3476 3477 3478 /** 3479 * Returns the bidirectional category value for the code point, 3480 * which is used in the Unicode bidirectional algorithm 3481 * (UAX #9 http://www.unicode.org/reports/tr9/). 3482 * Note that some <em>unassigned</em> code points have bidi values 3483 * of R or AL because they are in blocks that are reserved 3484 * for Right-To-Left scripts. 3485 * 3486 * Same as java.lang.Character.getDirectionality() 3487 * 3488 * @param c the code point to be tested 3489 * @return the bidirectional category (UCharDirection) value 3490 * 3491 * @see UCharDirection 3492 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3493 */ 3494 U_CAPI UCharDirection U_EXPORT2 3495 u_charDirection(UChar32 c) __INTRODUCED_IN(31); 3496 3497 3498 3499 /** 3500 * Determines whether the code point has the Bidi_Mirrored property. 3501 * This property is set for characters that are commonly used in 3502 * Right-To-Left contexts and need to be displayed with a "mirrored" 3503 * glyph. 3504 * 3505 * Same as java.lang.Character.isMirrored(). 3506 * Same as UCHAR_BIDI_MIRRORED 3507 * 3508 * @param c the code point to be tested 3509 * @return true if the character has the Bidi_Mirrored property 3510 * 3511 * @see UCHAR_BIDI_MIRRORED 3512 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3513 */ 3514 U_CAPI UBool U_EXPORT2 3515 u_isMirrored(UChar32 c) __INTRODUCED_IN(31); 3516 3517 3518 3519 /** 3520 * Maps the specified character to a "mirror-image" character. 3521 * For characters with the Bidi_Mirrored property, implementations 3522 * sometimes need a "poor man's" mapping to another Unicode 3523 * character (code point) such that the default glyph may serve 3524 * as the mirror-image of the default glyph of the specified 3525 * character. This is useful for text conversion to and from 3526 * codepages with visual order, and for displays without glyph 3527 * selection capabilities. 3528 * 3529 * @param c the code point to be mapped 3530 * @return another Unicode code point that may serve as a mirror-image 3531 * substitute, or c itself if there is no such mapping or c 3532 * does not have the Bidi_Mirrored property 3533 * 3534 * @see UCHAR_BIDI_MIRRORED 3535 * @see u_isMirrored 3536 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3537 */ 3538 U_CAPI UChar32 U_EXPORT2 3539 u_charMirror(UChar32 c) __INTRODUCED_IN(31); 3540 3541 3542 3543 /** 3544 * Maps the specified character to its paired bracket character. 3545 * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). 3546 * Otherwise c itself is returned. 3547 * See http://www.unicode.org/reports/tr9/ 3548 * 3549 * @param c the code point to be mapped 3550 * @return the paired bracket code point, 3551 * or c itself if there is no such mapping 3552 * (Bidi_Paired_Bracket_Type=None) 3553 * 3554 * @see UCHAR_BIDI_PAIRED_BRACKET 3555 * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE 3556 * @see u_charMirror 3557 * \xrefitem stable "Stable" "Stable List" ICU 52 3558 */ 3559 U_CAPI UChar32 U_EXPORT2 3560 u_getBidiPairedBracket(UChar32 c) __INTRODUCED_IN(31); 3561 3562 3563 3564 /** 3565 * Returns the general category value for the code point. 3566 * 3567 * Same as java.lang.Character.getType(). 3568 * 3569 * @param c the code point to be tested 3570 * @return the general category (UCharCategory) value 3571 * 3572 * @see UCharCategory 3573 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3574 */ 3575 U_CAPI int8_t U_EXPORT2 3576 u_charType(UChar32 c) __INTRODUCED_IN(31); 3577 3578 3579 3580 /** 3581 * Get a single-bit bit set for the general category of a character. 3582 * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. 3583 * Same as U_MASK(u_charType(c)). 3584 * 3585 * @param c the code point to be tested 3586 * @return a single-bit mask corresponding to the general category (UCharCategory) value 3587 * 3588 * @see u_charType 3589 * @see UCharCategory 3590 * @see U_GC_CN_MASK 3591 * \xrefitem stable "Stable" "Stable List" ICU 2.1 3592 */ 3593 #define U_GET_GC_MASK(c) U_MASK(u_charType(c)) 3594 3595 /** 3596 * Callback from u_enumCharTypes(), is called for each contiguous range 3597 * of code points c (where start<=c<limit) 3598 * with the same Unicode general category ("character type"). 3599 * 3600 * The callback function can stop the enumeration by returning false. 3601 * 3602 * @param context an opaque pointer, as passed into utrie_enum() 3603 * @param start the first code point in a contiguous range with value 3604 * @param limit one past the last code point in a contiguous range with value 3605 * @param type the general category for all code points in [start..limit[ 3606 * @return false to stop the enumeration 3607 * 3608 * \xrefitem stable "Stable" "Stable List" ICU 2.1 3609 * @see UCharCategory 3610 * @see u_enumCharTypes 3611 */ 3612 typedef UBool U_CALLCONV 3613 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); 3614 3615 /** 3616 * Enumerate efficiently all code points with their Unicode general categories. 3617 * 3618 * This is useful for building data structures (e.g., UnicodeSet's), 3619 * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. 3620 * 3621 * For each contiguous range of code points with a given general category ("character type"), 3622 * the UCharEnumTypeRange function is called. 3623 * Adjacent ranges have different types. 3624 * The Unicode Standard guarantees that the numeric value of the type is 0..31. 3625 * 3626 * @param enumRange a pointer to a function that is called for each contiguous range 3627 * of code points with the same general category 3628 * @param context an opaque pointer that is passed on to the callback function 3629 * 3630 * \xrefitem stable "Stable" "Stable List" ICU 2.1 3631 * @see UCharCategory 3632 * @see UCharEnumTypeRange 3633 */ 3634 U_CAPI void U_EXPORT2 3635 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) __INTRODUCED_IN(31); 3636 3637 3638 3639 #if !UCONFIG_NO_NORMALIZATION 3640 3641 /** 3642 * Returns the combining class of the code point as specified in UnicodeData.txt. 3643 * 3644 * @param c the code point of the character 3645 * @return the combining class of the character 3646 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3647 */ 3648 U_CAPI uint8_t U_EXPORT2 3649 u_getCombiningClass(UChar32 c) __INTRODUCED_IN(31); 3650 3651 3652 3653 #endif 3654 3655 /** 3656 * Returns the decimal digit value of a decimal digit character. 3657 * Such characters have the general category "Nd" (decimal digit numbers) 3658 * and a Numeric_Type of Decimal. 3659 * 3660 * Unlike ICU releases before 2.6, no digit values are returned for any 3661 * Han characters because Han number characters are often used with a special 3662 * Chinese-style number format (with characters for powers of 10 in between) 3663 * instead of in decimal-positional notation. 3664 * Unicode 4 explicitly assigns Han number characters the Numeric_Type 3665 * Numeric instead of Decimal. 3666 * See Jitterbug 1483 for more details. 3667 * 3668 * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() 3669 * for complete numeric Unicode properties. 3670 * 3671 * @param c the code point for which to get the decimal digit value 3672 * @return the decimal digit value of c, 3673 * or -1 if c is not a decimal digit character 3674 * 3675 * @see u_getNumericValue 3676 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3677 */ 3678 U_CAPI int32_t U_EXPORT2 3679 u_charDigitValue(UChar32 c) __INTRODUCED_IN(31); 3680 3681 3682 3683 3684 3685 /** 3686 * Retrieve the name of a Unicode character. 3687 * Depending on <code>nameChoice</code>, the character name written 3688 * into the buffer is the "modern" name or the name that was defined 3689 * in Unicode version 1.0. 3690 * The name contains only "invariant" characters 3691 * like A-Z, 0-9, space, and '-'. 3692 * Unicode 1.0 names are only retrieved if they are different from the modern 3693 * names and if the data file contains the data for them. gennames may or may 3694 * not be called with a command line option to include 1.0 names in unames.dat. 3695 * 3696 * @param code The character (code point) for which to get the name. 3697 * It must be <code>0<=code<=0x10ffff</code>. 3698 * @param nameChoice Selector for which name to get. 3699 * @param buffer Destination address for copying the name. 3700 * The name will always be zero-terminated. 3701 * If there is no name, then the buffer will be set to the empty string. 3702 * @param bufferLength <code>==sizeof(buffer)</code> 3703 * @param pErrorCode Pointer to a UErrorCode variable; 3704 * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> 3705 * returns. 3706 * @return The length of the name, or 0 if there is no name for this character. 3707 * If the bufferLength is less than or equal to the length, then the buffer 3708 * contains the truncated name and the returned length indicates the full 3709 * length of the name. 3710 * The length does not include the zero-termination. 3711 * 3712 * @see UCharNameChoice 3713 * @see u_charFromName 3714 * @see u_enumCharNames 3715 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3716 */ 3717 U_CAPI int32_t U_EXPORT2 3718 u_charName(UChar32 code, UCharNameChoice nameChoice, 3719 char *buffer, int32_t bufferLength, 3720 UErrorCode *pErrorCode) __INTRODUCED_IN(31); 3721 3722 3723 3724 #ifndef U_HIDE_DEPRECATED_API 3725 3726 #endif /* U_HIDE_DEPRECATED_API */ 3727 3728 /** 3729 * Find a Unicode character by its name and return its code point value. 3730 * The name is matched exactly and completely. 3731 * If the name does not correspond to a code point, <i>pErrorCode</i> 3732 * is set to <code>U_INVALID_CHAR_FOUND</code>. 3733 * A Unicode 1.0 name is matched only if it differs from the modern name. 3734 * Unicode names are all uppercase. Extended names are lowercase followed 3735 * by an uppercase hexadecimal number, and within angle brackets. 3736 * 3737 * @param nameChoice Selector for which name to match. 3738 * @param name The name to match. 3739 * @param pErrorCode Pointer to a UErrorCode variable 3740 * @return The Unicode value of the code point with the given name, 3741 * or an undefined value if there is no such code point. 3742 * 3743 * @see UCharNameChoice 3744 * @see u_charName 3745 * @see u_enumCharNames 3746 * \xrefitem stable "Stable" "Stable List" ICU 1.7 3747 */ 3748 U_CAPI UChar32 U_EXPORT2 3749 u_charFromName(UCharNameChoice nameChoice, 3750 const char *name, 3751 UErrorCode *pErrorCode) __INTRODUCED_IN(31); 3752 3753 3754 3755 /** 3756 * Type of a callback function for u_enumCharNames() that gets called 3757 * for each Unicode character with the code point value and 3758 * the character name. 3759 * If such a function returns false, then the enumeration is stopped. 3760 * 3761 * @param context The context pointer that was passed to u_enumCharNames(). 3762 * @param code The Unicode code point for the character with this name. 3763 * @param nameChoice Selector for which kind of names is enumerated. 3764 * @param name The character's name, zero-terminated. 3765 * @param length The length of the name. 3766 * @return true if the enumeration should continue, false to stop it. 3767 * 3768 * @see UCharNameChoice 3769 * @see u_enumCharNames 3770 * \xrefitem stable "Stable" "Stable List" ICU 1.7 3771 */ 3772 typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, 3773 UChar32 code, 3774 UCharNameChoice nameChoice, 3775 const char *name, 3776 int32_t length); 3777 3778 /** 3779 * Enumerate all assigned Unicode characters between the start and limit 3780 * code points (start inclusive, limit exclusive) and call a function 3781 * for each, passing the code point value and the character name. 3782 * For Unicode 1.0 names, only those are enumerated that differ from the 3783 * modern names. 3784 * 3785 * @param start The first code point in the enumeration range. 3786 * @param limit One more than the last code point in the enumeration range 3787 * (the first one after the range). 3788 * @param fn The function that is to be called for each character name. 3789 * @param context An arbitrary pointer that is passed to the function. 3790 * @param nameChoice Selector for which kind of names to enumerate. 3791 * @param pErrorCode Pointer to a UErrorCode variable 3792 * 3793 * @see UCharNameChoice 3794 * @see UEnumCharNamesFn 3795 * @see u_charName 3796 * @see u_charFromName 3797 * \xrefitem stable "Stable" "Stable List" ICU 1.7 3798 */ 3799 U_CAPI void U_EXPORT2 3800 u_enumCharNames(UChar32 start, UChar32 limit, 3801 UEnumCharNamesFn *fn, 3802 void *context, 3803 UCharNameChoice nameChoice, 3804 UErrorCode *pErrorCode) __INTRODUCED_IN(31); 3805 3806 3807 3808 /** 3809 * Return the Unicode name for a given property, as given in the 3810 * Unicode database file PropertyAliases.txt. 3811 * 3812 * In addition, this function maps the property 3813 * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 3814 * "General_Category_Mask". These names are not in 3815 * PropertyAliases.txt. 3816 * 3817 * @param property UProperty selector other than UCHAR_INVALID_CODE. 3818 * If out of range, NULL is returned. 3819 * 3820 * @param nameChoice selector for which name to get. If out of range, 3821 * NULL is returned. All properties have a long name. Most 3822 * have a short name, but some do not. Unicode allows for 3823 * additional names; if present these will be returned by 3824 * U_LONG_PROPERTY_NAME + i, where i=1, 2,... 3825 * 3826 * @return a pointer to the name, or NULL if either the 3827 * property or the nameChoice is out of range. If a given 3828 * nameChoice returns NULL, then all larger values of 3829 * nameChoice will return NULL, with one exception: if NULL is 3830 * returned for U_SHORT_PROPERTY_NAME, then 3831 * U_LONG_PROPERTY_NAME (and higher) may still return a 3832 * non-NULL value. The returned pointer is valid until 3833 * u_cleanup() is called. 3834 * 3835 * @see UProperty 3836 * @see UPropertyNameChoice 3837 * \xrefitem stable "Stable" "Stable List" ICU 2.4 3838 */ 3839 U_CAPI const char* U_EXPORT2 3840 u_getPropertyName(UProperty property, 3841 UPropertyNameChoice nameChoice) __INTRODUCED_IN(31); 3842 3843 3844 3845 /** 3846 * Return the UProperty enum for a given property name, as specified 3847 * in the Unicode database file PropertyAliases.txt. Short, long, and 3848 * any other variants are recognized. 3849 * 3850 * In addition, this function maps the synthetic names "gcm" / 3851 * "General_Category_Mask" to the property 3852 * UCHAR_GENERAL_CATEGORY_MASK. These names are not in 3853 * PropertyAliases.txt. 3854 * 3855 * @param alias the property name to be matched. The name is compared 3856 * using "loose matching" as described in PropertyAliases.txt. 3857 * 3858 * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name 3859 * does not match any property. 3860 * 3861 * @see UProperty 3862 * \xrefitem stable "Stable" "Stable List" ICU 2.4 3863 */ 3864 U_CAPI UProperty U_EXPORT2 3865 u_getPropertyEnum(const char* alias) __INTRODUCED_IN(31); 3866 3867 3868 3869 /** 3870 * Return the Unicode name for a given property value, as given in the 3871 * Unicode database file PropertyValueAliases.txt. 3872 * 3873 * Note: Some of the names in PropertyValueAliases.txt can only be 3874 * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not 3875 * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / 3876 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 3877 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 3878 * 3879 * @param property UProperty selector constant. 3880 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 3881 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT 3882 * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. 3883 * If out of range, NULL is returned. 3884 * 3885 * @param value selector for a value for the given property. If out 3886 * of range, NULL is returned. In general, valid values range 3887 * from 0 up to some maximum. There are a few exceptions: 3888 * (1.) UCHAR_BLOCK values begin at the non-zero value 3889 * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS 3890 * values are not contiguous and range from 0..240. (3.) 3891 * UCHAR_GENERAL_CATEGORY_MASK values are not values of 3892 * UCharCategory, but rather mask values produced by 3893 * U_GET_GC_MASK(). This allows grouped categories such as 3894 * [:L:] to be represented. Mask values range 3895 * non-contiguously from 1..U_GC_P_MASK. 3896 * 3897 * @param nameChoice selector for which name to get. If out of range, 3898 * NULL is returned. All values have a long name. Most have 3899 * a short name, but some do not. Unicode allows for 3900 * additional names; if present these will be returned by 3901 * U_LONG_PROPERTY_NAME + i, where i=1, 2,... 3902 3903 * @return a pointer to the name, or NULL if either the 3904 * property or the nameChoice is out of range. If a given 3905 * nameChoice returns NULL, then all larger values of 3906 * nameChoice will return NULL, with one exception: if NULL is 3907 * returned for U_SHORT_PROPERTY_NAME, then 3908 * U_LONG_PROPERTY_NAME (and higher) may still return a 3909 * non-NULL value. The returned pointer is valid until 3910 * u_cleanup() is called. 3911 * 3912 * @see UProperty 3913 * @see UPropertyNameChoice 3914 * \xrefitem stable "Stable" "Stable List" ICU 2.4 3915 */ 3916 U_CAPI const char* U_EXPORT2 3917 u_getPropertyValueName(UProperty property, 3918 int32_t value, 3919 UPropertyNameChoice nameChoice) __INTRODUCED_IN(31); 3920 3921 3922 3923 /** 3924 * Return the property value integer for a given value name, as 3925 * specified in the Unicode database file PropertyValueAliases.txt. 3926 * Short, long, and any other variants are recognized. 3927 * 3928 * Note: Some of the names in PropertyValueAliases.txt will only be 3929 * recognized with UCHAR_GENERAL_CATEGORY_MASK, not 3930 * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / 3931 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 3932 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 3933 * 3934 * @param property UProperty selector constant. 3935 * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 3936 * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT 3937 * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. 3938 * If out of range, UCHAR_INVALID_CODE is returned. 3939 * 3940 * @param alias the value name to be matched. The name is compared 3941 * using "loose matching" as described in 3942 * PropertyValueAliases.txt. 3943 * 3944 * @return a value integer or UCHAR_INVALID_CODE if the given name 3945 * does not match any value of the given property, or if the 3946 * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values 3947 * are not values of UCharCategory, but rather mask values 3948 * produced by U_GET_GC_MASK(). This allows grouped 3949 * categories such as [:L:] to be represented. 3950 * 3951 * @see UProperty 3952 * \xrefitem stable "Stable" "Stable List" ICU 2.4 3953 */ 3954 U_CAPI int32_t U_EXPORT2 3955 u_getPropertyValueEnum(UProperty property, 3956 const char* alias) __INTRODUCED_IN(31); 3957 3958 3959 3960 /** 3961 * Determines if the specified character is permissible as the first character in an identifier 3962 * according to UAX #31 Unicode Identifier and Pattern Syntax. 3963 * 3964 * Same as Unicode ID_Start (UCHAR_ID_START). 3965 * 3966 * @param c the code point to be tested 3967 * @return true if the code point may start an identifier 3968 * 3969 * @see UCHAR_ID_START 3970 * @see u_isalpha 3971 * @see u_isIDPart 3972 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3973 */ 3974 U_CAPI UBool U_EXPORT2 3975 u_isIDStart(UChar32 c) __INTRODUCED_IN(31); 3976 3977 3978 3979 /** 3980 * Determines if the specified character is permissible as a non-initial character of an identifier 3981 * according to UAX #31 Unicode Identifier and Pattern Syntax. 3982 * 3983 * Same as Unicode ID_Continue (UCHAR_ID_CONTINUE). 3984 * 3985 * @param c the code point to be tested 3986 * @return true if the code point may occur as a non-initial character of an identifier 3987 * 3988 * @see UCHAR_ID_CONTINUE 3989 * @see u_isIDStart 3990 * @see u_isIDIgnorable 3991 * \xrefitem stable "Stable" "Stable List" ICU 2.0 3992 */ 3993 U_CAPI UBool U_EXPORT2 3994 u_isIDPart(UChar32 c) __INTRODUCED_IN(31); 3995 3996 3997 3998 #ifndef U_HIDE_DRAFT_API 3999 4000 4001 4002 #endif // U_HIDE_DRAFT_API 4003 4004 /** 4005 * Determines if the specified character should be regarded 4006 * as an ignorable character in an identifier, 4007 * according to Java. 4008 * True for characters with general category "Cf" (format controls) as well as 4009 * non-whitespace ISO controls 4010 * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F). 4011 * 4012 * Same as java.lang.Character.isIdentifierIgnorable(). 4013 * 4014 * Note that Unicode just recommends to ignore Cf (format controls). 4015 * 4016 * @param c the code point to be tested 4017 * @return true if the code point is ignorable in identifiers according to Java 4018 * 4019 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT 4020 * @see u_isIDStart 4021 * @see u_isIDPart 4022 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4023 */ 4024 U_CAPI UBool U_EXPORT2 4025 u_isIDIgnorable(UChar32 c) __INTRODUCED_IN(31); 4026 4027 4028 4029 /** 4030 * Determines if the specified character is permissible as the 4031 * first character in a Java identifier. 4032 * In addition to u_isIDStart(c), true for characters with 4033 * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). 4034 * 4035 * Same as java.lang.Character.isJavaIdentifierStart(). 4036 * 4037 * @param c the code point to be tested 4038 * @return true if the code point may start a Java identifier 4039 * 4040 * @see u_isJavaIDPart 4041 * @see u_isalpha 4042 * @see u_isIDStart 4043 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4044 */ 4045 U_CAPI UBool U_EXPORT2 4046 u_isJavaIDStart(UChar32 c) __INTRODUCED_IN(31); 4047 4048 4049 4050 /** 4051 * Determines if the specified character is permissible 4052 * in a Java identifier. 4053 * In addition to u_isIDPart(c), true for characters with 4054 * general category "Sc" (currency symbols). 4055 * 4056 * Same as java.lang.Character.isJavaIdentifierPart(). 4057 * 4058 * @param c the code point to be tested 4059 * @return true if the code point may occur in a Java identifier 4060 * 4061 * @see u_isIDIgnorable 4062 * @see u_isJavaIDStart 4063 * @see u_isalpha 4064 * @see u_isdigit 4065 * @see u_isIDPart 4066 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4067 */ 4068 U_CAPI UBool U_EXPORT2 4069 u_isJavaIDPart(UChar32 c) __INTRODUCED_IN(31); 4070 4071 4072 4073 /** 4074 * The given character is mapped to its lowercase equivalent according to 4075 * UnicodeData.txt; if the character has no lowercase equivalent, the character 4076 * itself is returned. 4077 * 4078 * Same as java.lang.Character.toLowerCase(). 4079 * 4080 * This function only returns the simple, single-code point case mapping. 4081 * Full case mappings should be used whenever possible because they produce 4082 * better results by working on whole strings. 4083 * They take into account the string context and the language and can map 4084 * to a result string with a different length as appropriate. 4085 * Full case mappings are applied by the string case mapping functions, 4086 * see ustring.h and the UnicodeString class. 4087 * See also the User Guide chapter on C/POSIX migration: 4088 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4089 * 4090 * @param c the code point to be mapped 4091 * @return the Simple_Lowercase_Mapping of the code point, if any; 4092 * otherwise the code point itself. 4093 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4094 */ 4095 U_CAPI UChar32 U_EXPORT2 4096 u_tolower(UChar32 c) __INTRODUCED_IN(31); 4097 4098 4099 4100 /** 4101 * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; 4102 * if the character has no uppercase equivalent, the character itself is 4103 * returned. 4104 * 4105 * Same as java.lang.Character.toUpperCase(). 4106 * 4107 * This function only returns the simple, single-code point case mapping. 4108 * Full case mappings should be used whenever possible because they produce 4109 * better results by working on whole strings. 4110 * They take into account the string context and the language and can map 4111 * to a result string with a different length as appropriate. 4112 * Full case mappings are applied by the string case mapping functions, 4113 * see ustring.h and the UnicodeString class. 4114 * See also the User Guide chapter on C/POSIX migration: 4115 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4116 * 4117 * @param c the code point to be mapped 4118 * @return the Simple_Uppercase_Mapping of the code point, if any; 4119 * otherwise the code point itself. 4120 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4121 */ 4122 U_CAPI UChar32 U_EXPORT2 4123 u_toupper(UChar32 c) __INTRODUCED_IN(31); 4124 4125 4126 4127 /** 4128 * The given character is mapped to its titlecase equivalent 4129 * according to UnicodeData.txt; 4130 * if none is defined, the character itself is returned. 4131 * 4132 * Same as java.lang.Character.toTitleCase(). 4133 * 4134 * This function only returns the simple, single-code point case mapping. 4135 * Full case mappings should be used whenever possible because they produce 4136 * better results by working on whole strings. 4137 * They take into account the string context and the language and can map 4138 * to a result string with a different length as appropriate. 4139 * Full case mappings are applied by the string case mapping functions, 4140 * see ustring.h and the UnicodeString class. 4141 * See also the User Guide chapter on C/POSIX migration: 4142 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4143 * 4144 * @param c the code point to be mapped 4145 * @return the Simple_Titlecase_Mapping of the code point, if any; 4146 * otherwise the code point itself. 4147 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4148 */ 4149 U_CAPI UChar32 U_EXPORT2 4150 u_totitle(UChar32 c) __INTRODUCED_IN(31); 4151 4152 4153 4154 /** 4155 * The given character is mapped to its case folding equivalent according to 4156 * UnicodeData.txt and CaseFolding.txt; 4157 * if the character has no case folding equivalent, the character 4158 * itself is returned. 4159 * 4160 * This function only returns the simple, single-code point case mapping. 4161 * Full case mappings should be used whenever possible because they produce 4162 * better results by working on whole strings. 4163 * They take into account the string context and the language and can map 4164 * to a result string with a different length as appropriate. 4165 * Full case mappings are applied by the string case mapping functions, 4166 * see ustring.h and the UnicodeString class. 4167 * See also the User Guide chapter on C/POSIX migration: 4168 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4169 * 4170 * @param c the code point to be mapped 4171 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 4172 * @return the Simple_Case_Folding of the code point, if any; 4173 * otherwise the code point itself. 4174 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4175 */ 4176 U_CAPI UChar32 U_EXPORT2 4177 u_foldCase(UChar32 c, uint32_t options) __INTRODUCED_IN(31); 4178 4179 4180 4181 /** 4182 * Returns the decimal digit value of the code point in the 4183 * specified radix. 4184 * 4185 * If the radix is not in the range <code>2<=radix<=36</code> or if the 4186 * value of <code>c</code> is not a valid digit in the specified 4187 * radix, <code>-1</code> is returned. A character is a valid digit 4188 * if at least one of the following is true: 4189 * <ul> 4190 * <li>The character has a decimal digit value. 4191 * Such characters have the general category "Nd" (decimal digit numbers) 4192 * and a Numeric_Type of Decimal. 4193 * In this case the value is the character's decimal digit value.</li> 4194 * <li>The character is one of the uppercase Latin letters 4195 * <code>'A'</code> through <code>'Z'</code>. 4196 * In this case the value is <code>c-'A'+10</code>.</li> 4197 * <li>The character is one of the lowercase Latin letters 4198 * <code>'a'</code> through <code>'z'</code>. 4199 * In this case the value is <code>ch-'a'+10</code>.</li> 4200 * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) 4201 * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) 4202 * are recognized.</li> 4203 * </ul> 4204 * 4205 * Same as java.lang.Character.digit(). 4206 * 4207 * @param ch the code point to be tested. 4208 * @param radix the radix. 4209 * @return the numeric value represented by the character in the 4210 * specified radix, 4211 * or -1 if there is no value or if the value exceeds the radix. 4212 * 4213 * @see UCHAR_NUMERIC_TYPE 4214 * @see u_forDigit 4215 * @see u_charDigitValue 4216 * @see u_isdigit 4217 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4218 */ 4219 U_CAPI int32_t U_EXPORT2 4220 u_digit(UChar32 ch, int8_t radix) __INTRODUCED_IN(31); 4221 4222 4223 4224 /** 4225 * Determines the character representation for a specific digit in 4226 * the specified radix. If the value of <code>radix</code> is not a 4227 * valid radix, or the value of <code>digit</code> is not a valid 4228 * digit in the specified radix, the null character 4229 * (<code>U+0000</code>) is returned. 4230 * <p> 4231 * The <code>radix</code> argument is valid if it is greater than or 4232 * equal to 2 and less than or equal to 36. 4233 * The <code>digit</code> argument is valid if 4234 * <code>0 <= digit < radix</code>. 4235 * <p> 4236 * If the digit is less than 10, then 4237 * <code>'0' + digit</code> is returned. Otherwise, the value 4238 * <code>'a' + digit - 10</code> is returned. 4239 * 4240 * Same as java.lang.Character.forDigit(). 4241 * 4242 * @param digit the number to convert to a character. 4243 * @param radix the radix. 4244 * @return the <code>char</code> representation of the specified digit 4245 * in the specified radix. 4246 * 4247 * @see u_digit 4248 * @see u_charDigitValue 4249 * @see u_isdigit 4250 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4251 */ 4252 U_CAPI UChar32 U_EXPORT2 4253 u_forDigit(int32_t digit, int8_t radix) __INTRODUCED_IN(31); 4254 4255 4256 4257 /** 4258 * Get the "age" of the code point. 4259 * The "age" is the Unicode version when the code point was first 4260 * designated (as a non-character or for Private Use) 4261 * or assigned a character. 4262 * This can be useful to avoid emitting code points to receiving 4263 * processes that do not accept newer characters. 4264 * The data is from the UCD file DerivedAge.txt. 4265 * 4266 * @param c The code point. 4267 * @param versionArray The Unicode version number array, to be filled in. 4268 * 4269 * \xrefitem stable "Stable" "Stable List" ICU 2.1 4270 */ 4271 U_CAPI void U_EXPORT2 4272 u_charAge(UChar32 c, UVersionInfo versionArray) __INTRODUCED_IN(31); 4273 4274 4275 4276 /** 4277 * Gets the Unicode version information. 4278 * The version array is filled in with the version information 4279 * for the Unicode standard that is currently used by ICU. 4280 * For example, Unicode version 3.1.1 is represented as an array with 4281 * the values { 3, 1, 1, 0 }. 4282 * 4283 * @param versionArray an output array that will be filled in with 4284 * the Unicode version number 4285 * \xrefitem stable "Stable" "Stable List" ICU 2.0 4286 */ 4287 U_CAPI void U_EXPORT2 4288 u_getUnicodeVersion(UVersionInfo versionArray) __INTRODUCED_IN(31); 4289 4290 4291 4292 #if !UCONFIG_NO_NORMALIZATION 4293 4294 4295 #endif 4296 4297 4298 U_CDECL_END 4299 4300 #endif /*_UCHAR*/ 4301 /*eof*/ 4302 4303 /** @} */ // addtogroup 4304