1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1996-2015, International Business Machines Corporation and others. 6*0e209d39SAndroid Build Coastguard Worker * All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 8*0e209d39SAndroid Build Coastguard Worker */ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #ifndef UBRK_H 11*0e209d39SAndroid Build Coastguard Worker #define UBRK_H 12*0e209d39SAndroid Build Coastguard Worker 13*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 14*0e209d39SAndroid Build Coastguard Worker #include "unicode/uloc.h" 15*0e209d39SAndroid Build Coastguard Worker #include "unicode/utext.h" 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API 18*0e209d39SAndroid Build Coastguard Worker #include "unicode/localpointer.h" 19*0e209d39SAndroid Build Coastguard Worker #endif // U_SHOW_CPLUSPLUS_API 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker /** 22*0e209d39SAndroid Build Coastguard Worker * A text-break iterator. 23*0e209d39SAndroid Build Coastguard Worker * For usage in C programs. 24*0e209d39SAndroid Build Coastguard Worker */ 25*0e209d39SAndroid Build Coastguard Worker #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR 26*0e209d39SAndroid Build Coastguard Worker # define UBRK_TYPEDEF_UBREAK_ITERATOR 27*0e209d39SAndroid Build Coastguard Worker /** 28*0e209d39SAndroid Build Coastguard Worker * Opaque type representing an ICU Break iterator object. 29*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 30*0e209d39SAndroid Build Coastguard Worker */ 31*0e209d39SAndroid Build Coastguard Worker typedef struct UBreakIterator UBreakIterator; 32*0e209d39SAndroid Build Coastguard Worker #endif 33*0e209d39SAndroid Build Coastguard Worker 34*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION 35*0e209d39SAndroid Build Coastguard Worker 36*0e209d39SAndroid Build Coastguard Worker #include "unicode/parseerr.h" 37*0e209d39SAndroid Build Coastguard Worker 38*0e209d39SAndroid Build Coastguard Worker /** 39*0e209d39SAndroid Build Coastguard Worker * \file 40*0e209d39SAndroid Build Coastguard Worker * \brief C API: BreakIterator 41*0e209d39SAndroid Build Coastguard Worker * 42*0e209d39SAndroid Build Coastguard Worker * <h2> BreakIterator C API </h2> 43*0e209d39SAndroid Build Coastguard Worker * 44*0e209d39SAndroid Build Coastguard Worker * The BreakIterator C API defines methods for finding the location 45*0e209d39SAndroid Build Coastguard Worker * of boundaries in text. Pointer to a UBreakIterator maintain a 46*0e209d39SAndroid Build Coastguard Worker * current position and scan over text returning the index of characters 47*0e209d39SAndroid Build Coastguard Worker * where boundaries occur. 48*0e209d39SAndroid Build Coastguard Worker * <p> 49*0e209d39SAndroid Build Coastguard Worker * Line boundary analysis determines where a text string can be broken 50*0e209d39SAndroid Build Coastguard Worker * when line-wrapping. The mechanism correctly handles punctuation and 51*0e209d39SAndroid Build Coastguard Worker * hyphenated words. 52*0e209d39SAndroid Build Coastguard Worker * <p> 53*0e209d39SAndroid Build Coastguard Worker * Note: The locale keyword "lb" can be used to modify line break 54*0e209d39SAndroid Build Coastguard Worker * behavior according to the CSS level 3 line-break options, see 55*0e209d39SAndroid Build Coastguard Worker * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example: 56*0e209d39SAndroid Build Coastguard Worker * "ja@lb=strict", "zh@lb=loose". 57*0e209d39SAndroid Build Coastguard Worker * <p> 58*0e209d39SAndroid Build Coastguard Worker * Sentence boundary analysis allows selection with correct 59*0e209d39SAndroid Build Coastguard Worker * interpretation of periods within numbers and abbreviations, and 60*0e209d39SAndroid Build Coastguard Worker * trailing punctuation marks such as quotation marks and parentheses. 61*0e209d39SAndroid Build Coastguard Worker * <p> 62*0e209d39SAndroid Build Coastguard Worker * Note: The locale keyword "ss" can be used to enable use of 63*0e209d39SAndroid Build Coastguard Worker * segmentation suppression data (preventing breaks in English after 64*0e209d39SAndroid Build Coastguard Worker * abbreviations such as "Mr." or "Est.", for example), as follows: 65*0e209d39SAndroid Build Coastguard Worker * "en@ss=standard". 66*0e209d39SAndroid Build Coastguard Worker * <p> 67*0e209d39SAndroid Build Coastguard Worker * Word boundary analysis is used by search and replace functions, as 68*0e209d39SAndroid Build Coastguard Worker * well as within text editing applications that allow the user to 69*0e209d39SAndroid Build Coastguard Worker * select words with a double click. Word selection provides correct 70*0e209d39SAndroid Build Coastguard Worker * interpretation of punctuation marks within and following 71*0e209d39SAndroid Build Coastguard Worker * words. Characters that are not part of a word, such as symbols or 72*0e209d39SAndroid Build Coastguard Worker * punctuation marks, have word-breaks on both sides. 73*0e209d39SAndroid Build Coastguard Worker * <p> 74*0e209d39SAndroid Build Coastguard Worker * Character boundary analysis identifies the boundaries of 75*0e209d39SAndroid Build Coastguard Worker * "Extended Grapheme Clusters", which are groupings of codepoints 76*0e209d39SAndroid Build Coastguard Worker * that should be treated as character-like units for many text operations. 77*0e209d39SAndroid Build Coastguard Worker * Please see Unicode Standard Annex #29, Unicode Text Segmentation, 78*0e209d39SAndroid Build Coastguard Worker * http://www.unicode.org/reports/tr29/ for additional information 79*0e209d39SAndroid Build Coastguard Worker * on grapheme clusters and guidelines on their use. 80*0e209d39SAndroid Build Coastguard Worker * <p> 81*0e209d39SAndroid Build Coastguard Worker * Title boundary analysis locates all positions, 82*0e209d39SAndroid Build Coastguard Worker * typically starts of words, that should be set to Title Case 83*0e209d39SAndroid Build Coastguard Worker * when title casing the text. 84*0e209d39SAndroid Build Coastguard Worker * <p> 85*0e209d39SAndroid Build Coastguard Worker * The text boundary positions are found according to the rules 86*0e209d39SAndroid Build Coastguard Worker * described in Unicode Standard Annex #29, Text Boundaries, and 87*0e209d39SAndroid Build Coastguard Worker * Unicode Standard Annex #14, Line Breaking Properties. These 88*0e209d39SAndroid Build Coastguard Worker * are available at http://www.unicode.org/reports/tr14/ and 89*0e209d39SAndroid Build Coastguard Worker * http://www.unicode.org/reports/tr29/. 90*0e209d39SAndroid Build Coastguard Worker * <p> 91*0e209d39SAndroid Build Coastguard Worker * In addition to the plain C API defined in this header file, an 92*0e209d39SAndroid Build Coastguard Worker * object oriented C++ API with equivalent functionality is defined in the 93*0e209d39SAndroid Build Coastguard Worker * file brkiter.h. 94*0e209d39SAndroid Build Coastguard Worker * <p> 95*0e209d39SAndroid Build Coastguard Worker * Code snippets illustrating the use of the Break Iterator APIs 96*0e209d39SAndroid Build Coastguard Worker * are available in the ICU User Guide, 97*0e209d39SAndroid Build Coastguard Worker * https://unicode-org.github.io/icu/userguide/boundaryanalysis/ 98*0e209d39SAndroid Build Coastguard Worker * and in the sample program icu/source/samples/break/break.cpp 99*0e209d39SAndroid Build Coastguard Worker */ 100*0e209d39SAndroid Build Coastguard Worker 101*0e209d39SAndroid Build Coastguard Worker /** The possible types of text boundaries. @stable ICU 2.0 */ 102*0e209d39SAndroid Build Coastguard Worker typedef enum UBreakIteratorType { 103*0e209d39SAndroid Build Coastguard Worker /** Character breaks @stable ICU 2.0 */ 104*0e209d39SAndroid Build Coastguard Worker UBRK_CHARACTER = 0, 105*0e209d39SAndroid Build Coastguard Worker /** Word breaks @stable ICU 2.0 */ 106*0e209d39SAndroid Build Coastguard Worker UBRK_WORD = 1, 107*0e209d39SAndroid Build Coastguard Worker /** Line breaks @stable ICU 2.0 */ 108*0e209d39SAndroid Build Coastguard Worker UBRK_LINE = 2, 109*0e209d39SAndroid Build Coastguard Worker /** Sentence breaks @stable ICU 2.0 */ 110*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE = 3, 111*0e209d39SAndroid Build Coastguard Worker 112*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_DEPRECATED_API 113*0e209d39SAndroid Build Coastguard Worker /** 114*0e209d39SAndroid Build Coastguard Worker * Title Case breaks 115*0e209d39SAndroid Build Coastguard Worker * The iterator created using this type locates title boundaries as described for 116*0e209d39SAndroid Build Coastguard Worker * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, 117*0e209d39SAndroid Build Coastguard Worker * please use Word Boundary iterator. 118*0e209d39SAndroid Build Coastguard Worker * 119*0e209d39SAndroid Build Coastguard Worker * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. 120*0e209d39SAndroid Build Coastguard Worker */ 121*0e209d39SAndroid Build Coastguard Worker UBRK_TITLE = 4, 122*0e209d39SAndroid Build Coastguard Worker /** 123*0e209d39SAndroid Build Coastguard Worker * One more than the highest normal UBreakIteratorType value. 124*0e209d39SAndroid Build Coastguard Worker * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 125*0e209d39SAndroid Build Coastguard Worker */ 126*0e209d39SAndroid Build Coastguard Worker UBRK_COUNT = 5 127*0e209d39SAndroid Build Coastguard Worker #endif // U_HIDE_DEPRECATED_API 128*0e209d39SAndroid Build Coastguard Worker } UBreakIteratorType; 129*0e209d39SAndroid Build Coastguard Worker 130*0e209d39SAndroid Build Coastguard Worker /** Value indicating all text boundaries have been returned. 131*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 132*0e209d39SAndroid Build Coastguard Worker */ 133*0e209d39SAndroid Build Coastguard Worker #define UBRK_DONE ((int32_t) -1) 134*0e209d39SAndroid Build Coastguard Worker 135*0e209d39SAndroid Build Coastguard Worker 136*0e209d39SAndroid Build Coastguard Worker /** 137*0e209d39SAndroid Build Coastguard Worker * Enum constants for the word break tags returned by 138*0e209d39SAndroid Build Coastguard Worker * getRuleStatus(). A range of values is defined for each category of 139*0e209d39SAndroid Build Coastguard Worker * word, to allow for further subdivisions of a category in future releases. 140*0e209d39SAndroid Build Coastguard Worker * Applications should check for tag values falling within the range, rather 141*0e209d39SAndroid Build Coastguard Worker * than for single individual values. 142*0e209d39SAndroid Build Coastguard Worker * 143*0e209d39SAndroid Build Coastguard Worker * The numeric values of all of these constants are stable (will not change). 144*0e209d39SAndroid Build Coastguard Worker * 145*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.2 146*0e209d39SAndroid Build Coastguard Worker */ 147*0e209d39SAndroid Build Coastguard Worker typedef enum UWordBreak { 148*0e209d39SAndroid Build Coastguard Worker /** Tag value for "words" that do not fit into any of other categories. 149*0e209d39SAndroid Build Coastguard Worker * Includes spaces and most punctuation. */ 150*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NONE = 0, 151*0e209d39SAndroid Build Coastguard Worker /** Upper bound for tags for uncategorized words. */ 152*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NONE_LIMIT = 100, 153*0e209d39SAndroid Build Coastguard Worker /** Tag value for words that appear to be numbers, lower limit. */ 154*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NUMBER = 100, 155*0e209d39SAndroid Build Coastguard Worker /** Tag value for words that appear to be numbers, upper limit. */ 156*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NUMBER_LIMIT = 200, 157*0e209d39SAndroid Build Coastguard Worker /** Tag value for words that contain letters, excluding 158*0e209d39SAndroid Build Coastguard Worker * hiragana, katakana or ideographic characters, lower limit. */ 159*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_LETTER = 200, 160*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing letters, upper limit */ 161*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_LETTER_LIMIT = 300, 162*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing kana characters, lower limit */ 163*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_KANA = 300, 164*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing kana characters, upper limit */ 165*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_KANA_LIMIT = 400, 166*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing ideographic characters, lower limit */ 167*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_IDEO = 400, 168*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing ideographic characters, upper limit */ 169*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_IDEO_LIMIT = 500 170*0e209d39SAndroid Build Coastguard Worker } UWordBreak; 171*0e209d39SAndroid Build Coastguard Worker 172*0e209d39SAndroid Build Coastguard Worker /** 173*0e209d39SAndroid Build Coastguard Worker * Enum constants for the line break tags returned by getRuleStatus(). 174*0e209d39SAndroid Build Coastguard Worker * A range of values is defined for each category of 175*0e209d39SAndroid Build Coastguard Worker * word, to allow for further subdivisions of a category in future releases. 176*0e209d39SAndroid Build Coastguard Worker * Applications should check for tag values falling within the range, rather 177*0e209d39SAndroid Build Coastguard Worker * than for single individual values. 178*0e209d39SAndroid Build Coastguard Worker * 179*0e209d39SAndroid Build Coastguard Worker * The numeric values of all of these constants are stable (will not change). 180*0e209d39SAndroid Build Coastguard Worker * 181*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.8 182*0e209d39SAndroid Build Coastguard Worker */ 183*0e209d39SAndroid Build Coastguard Worker typedef enum ULineBreakTag { 184*0e209d39SAndroid Build Coastguard Worker /** Tag value for soft line breaks, positions at which a line break 185*0e209d39SAndroid Build Coastguard Worker * is acceptable but not required */ 186*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_SOFT = 0, 187*0e209d39SAndroid Build Coastguard Worker /** Upper bound for soft line breaks. */ 188*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_SOFT_LIMIT = 100, 189*0e209d39SAndroid Build Coastguard Worker /** Tag value for a hard, or mandatory line break */ 190*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_HARD = 100, 191*0e209d39SAndroid Build Coastguard Worker /** Upper bound for hard line breaks. */ 192*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_HARD_LIMIT = 200 193*0e209d39SAndroid Build Coastguard Worker } ULineBreakTag; 194*0e209d39SAndroid Build Coastguard Worker 195*0e209d39SAndroid Build Coastguard Worker 196*0e209d39SAndroid Build Coastguard Worker 197*0e209d39SAndroid Build Coastguard Worker /** 198*0e209d39SAndroid Build Coastguard Worker * Enum constants for the sentence break tags returned by getRuleStatus(). 199*0e209d39SAndroid Build Coastguard Worker * A range of values is defined for each category of 200*0e209d39SAndroid Build Coastguard Worker * sentence, to allow for further subdivisions of a category in future releases. 201*0e209d39SAndroid Build Coastguard Worker * Applications should check for tag values falling within the range, rather 202*0e209d39SAndroid Build Coastguard Worker * than for single individual values. 203*0e209d39SAndroid Build Coastguard Worker * 204*0e209d39SAndroid Build Coastguard Worker * The numeric values of all of these constants are stable (will not change). 205*0e209d39SAndroid Build Coastguard Worker * 206*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.8 207*0e209d39SAndroid Build Coastguard Worker */ 208*0e209d39SAndroid Build Coastguard Worker typedef enum USentenceBreakTag { 209*0e209d39SAndroid Build Coastguard Worker /** Tag value for for sentences ending with a sentence terminator 210*0e209d39SAndroid Build Coastguard Worker * ('.', '?', '!', etc.) character, possibly followed by a 211*0e209d39SAndroid Build Coastguard Worker * hard separator (CR, LF, PS, etc.) 212*0e209d39SAndroid Build Coastguard Worker */ 213*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_TERM = 0, 214*0e209d39SAndroid Build Coastguard Worker /** Upper bound for tags for sentences ended by sentence terminators. */ 215*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_TERM_LIMIT = 100, 216*0e209d39SAndroid Build Coastguard Worker /** Tag value for for sentences that do not contain an ending 217*0e209d39SAndroid Build Coastguard Worker * sentence terminator ('.', '?', '!', etc.) character, but 218*0e209d39SAndroid Build Coastguard Worker * are ended only by a hard separator (CR, LF, PS, etc.) or end of input. 219*0e209d39SAndroid Build Coastguard Worker */ 220*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_SEP = 100, 221*0e209d39SAndroid Build Coastguard Worker /** Upper bound for tags for sentences ended by a separator. */ 222*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_SEP_LIMIT = 200 223*0e209d39SAndroid Build Coastguard Worker /** Tag value for a hard, or mandatory line break */ 224*0e209d39SAndroid Build Coastguard Worker } USentenceBreakTag; 225*0e209d39SAndroid Build Coastguard Worker 226*0e209d39SAndroid Build Coastguard Worker 227*0e209d39SAndroid Build Coastguard Worker /** 228*0e209d39SAndroid Build Coastguard Worker * Open a new UBreakIterator for locating text boundaries for a specified locale. 229*0e209d39SAndroid Build Coastguard Worker * A UBreakIterator may be used for detecting character, line, word, 230*0e209d39SAndroid Build Coastguard Worker * and sentence breaks in text. 231*0e209d39SAndroid Build Coastguard Worker * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, 232*0e209d39SAndroid Build Coastguard Worker * UBRK_LINE, UBRK_SENTENCE 233*0e209d39SAndroid Build Coastguard Worker * @param locale The locale specifying the text-breaking conventions. Note that 234*0e209d39SAndroid Build Coastguard Worker * locale keys such as "lb" and "ss" may be used to modify text break behavior, 235*0e209d39SAndroid Build Coastguard Worker * see general discussion of BreakIterator C API. 236*0e209d39SAndroid Build Coastguard Worker * @param text The text to be iterated over. May be null, in which case ubrk_setText() is 237*0e209d39SAndroid Build Coastguard Worker * used to specify the text to be iterated. 238*0e209d39SAndroid Build Coastguard Worker * @param textLength The number of characters in text, or -1 if null-terminated. 239*0e209d39SAndroid Build Coastguard Worker * @param status A UErrorCode to receive any errors. 240*0e209d39SAndroid Build Coastguard Worker * @return A UBreakIterator for the specified locale. 241*0e209d39SAndroid Build Coastguard Worker * @see ubrk_openRules 242*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 243*0e209d39SAndroid Build Coastguard Worker */ 244*0e209d39SAndroid Build Coastguard Worker U_CAPI UBreakIterator* U_EXPORT2 245*0e209d39SAndroid Build Coastguard Worker ubrk_open(UBreakIteratorType type, 246*0e209d39SAndroid Build Coastguard Worker const char *locale, 247*0e209d39SAndroid Build Coastguard Worker const UChar *text, 248*0e209d39SAndroid Build Coastguard Worker int32_t textLength, 249*0e209d39SAndroid Build Coastguard Worker UErrorCode *status); 250*0e209d39SAndroid Build Coastguard Worker 251*0e209d39SAndroid Build Coastguard Worker /** 252*0e209d39SAndroid Build Coastguard Worker * Open a new UBreakIterator for locating text boundaries using specified breaking rules. 253*0e209d39SAndroid Build Coastguard Worker * The rule syntax is ... (TBD) 254*0e209d39SAndroid Build Coastguard Worker * @param rules A set of rules specifying the text breaking conventions. 255*0e209d39SAndroid Build Coastguard Worker * @param rulesLength The number of characters in rules, or -1 if null-terminated. 256*0e209d39SAndroid Build Coastguard Worker * @param text The text to be iterated over. May be null, in which case ubrk_setText() is 257*0e209d39SAndroid Build Coastguard Worker * used to specify the text to be iterated. 258*0e209d39SAndroid Build Coastguard Worker * @param textLength The number of characters in text, or -1 if null-terminated. 259*0e209d39SAndroid Build Coastguard Worker * @param parseErr Receives position and context information for any syntax errors 260*0e209d39SAndroid Build Coastguard Worker * detected while parsing the rules. 261*0e209d39SAndroid Build Coastguard Worker * @param status A UErrorCode to receive any errors. 262*0e209d39SAndroid Build Coastguard Worker * @return A UBreakIterator for the specified rules. 263*0e209d39SAndroid Build Coastguard Worker * @see ubrk_open 264*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.2 265*0e209d39SAndroid Build Coastguard Worker */ 266*0e209d39SAndroid Build Coastguard Worker U_CAPI UBreakIterator* U_EXPORT2 267*0e209d39SAndroid Build Coastguard Worker ubrk_openRules(const UChar *rules, 268*0e209d39SAndroid Build Coastguard Worker int32_t rulesLength, 269*0e209d39SAndroid Build Coastguard Worker const UChar *text, 270*0e209d39SAndroid Build Coastguard Worker int32_t textLength, 271*0e209d39SAndroid Build Coastguard Worker UParseError *parseErr, 272*0e209d39SAndroid Build Coastguard Worker UErrorCode *status); 273*0e209d39SAndroid Build Coastguard Worker 274*0e209d39SAndroid Build Coastguard Worker /** 275*0e209d39SAndroid Build Coastguard Worker * Open a new UBreakIterator for locating text boundaries using precompiled binary rules. 276*0e209d39SAndroid Build Coastguard Worker * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules. 277*0e209d39SAndroid Build Coastguard Worker * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not 278*0e209d39SAndroid Build Coastguard Worker * compatible across different major versions of ICU, nor across platforms of different 279*0e209d39SAndroid Build Coastguard Worker * endianness or different base character set family (ASCII vs EBCDIC). 280*0e209d39SAndroid Build Coastguard Worker * @param binaryRules A set of compiled binary rules specifying the text breaking 281*0e209d39SAndroid Build Coastguard Worker * conventions. Ownership of the storage containing the compiled 282*0e209d39SAndroid Build Coastguard Worker * rules remains with the caller of this function. The compiled 283*0e209d39SAndroid Build Coastguard Worker * rules must not be modified or deleted during the life of the 284*0e209d39SAndroid Build Coastguard Worker * break iterator. 285*0e209d39SAndroid Build Coastguard Worker * @param rulesLength The length of binaryRules in bytes; must be >= 0. 286*0e209d39SAndroid Build Coastguard Worker * @param text The text to be iterated over. May be null, in which case 287*0e209d39SAndroid Build Coastguard Worker * ubrk_setText() is used to specify the text to be iterated. 288*0e209d39SAndroid Build Coastguard Worker * @param textLength The number of characters in text, or -1 if null-terminated. 289*0e209d39SAndroid Build Coastguard Worker * @param status Pointer to UErrorCode to receive any errors. 290*0e209d39SAndroid Build Coastguard Worker * @return UBreakIterator for the specified rules. 291*0e209d39SAndroid Build Coastguard Worker * @see ubrk_getBinaryRules 292*0e209d39SAndroid Build Coastguard Worker * @stable ICU 59 293*0e209d39SAndroid Build Coastguard Worker */ 294*0e209d39SAndroid Build Coastguard Worker U_CAPI UBreakIterator* U_EXPORT2 295*0e209d39SAndroid Build Coastguard Worker ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, 296*0e209d39SAndroid Build Coastguard Worker const UChar * text, int32_t textLength, 297*0e209d39SAndroid Build Coastguard Worker UErrorCode * status); 298*0e209d39SAndroid Build Coastguard Worker 299*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_DEPRECATED_API 300*0e209d39SAndroid Build Coastguard Worker 301*0e209d39SAndroid Build Coastguard Worker /** 302*0e209d39SAndroid Build Coastguard Worker * Thread safe cloning operation 303*0e209d39SAndroid Build Coastguard Worker * @param bi iterator to be cloned 304*0e209d39SAndroid Build Coastguard Worker * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> 305*0e209d39SAndroid Build Coastguard Worker * user allocated space for the new clone. If NULL new memory will be allocated. 306*0e209d39SAndroid Build Coastguard Worker * If buffer is not large enough, new memory will be allocated. 307*0e209d39SAndroid Build Coastguard Worker * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. 308*0e209d39SAndroid Build Coastguard Worker * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> 309*0e209d39SAndroid Build Coastguard Worker * pointer to size of allocated space. 310*0e209d39SAndroid Build Coastguard Worker * If *pBufferSize == 0, a sufficient size for use in cloning will 311*0e209d39SAndroid Build Coastguard Worker * be returned ('pre-flighting') 312*0e209d39SAndroid Build Coastguard Worker * If *pBufferSize is not enough for a stack-based safe clone, 313*0e209d39SAndroid Build Coastguard Worker * new memory will be allocated. 314*0e209d39SAndroid Build Coastguard Worker * @param status to indicate whether the operation went on smoothly or there were errors 315*0e209d39SAndroid Build Coastguard Worker * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used 316*0e209d39SAndroid Build Coastguard Worker * if pBufferSize != NULL and any allocations were necessary 317*0e209d39SAndroid Build Coastguard Worker * @return pointer to the new clone 318*0e209d39SAndroid Build Coastguard Worker * @deprecated ICU 69 Use ubrk_clone() instead. 319*0e209d39SAndroid Build Coastguard Worker */ 320*0e209d39SAndroid Build Coastguard Worker U_DEPRECATED UBreakIterator * U_EXPORT2 321*0e209d39SAndroid Build Coastguard Worker ubrk_safeClone( 322*0e209d39SAndroid Build Coastguard Worker const UBreakIterator *bi, 323*0e209d39SAndroid Build Coastguard Worker void *stackBuffer, 324*0e209d39SAndroid Build Coastguard Worker int32_t *pBufferSize, 325*0e209d39SAndroid Build Coastguard Worker UErrorCode *status); 326*0e209d39SAndroid Build Coastguard Worker 327*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_DEPRECATED_API */ 328*0e209d39SAndroid Build Coastguard Worker 329*0e209d39SAndroid Build Coastguard Worker /** 330*0e209d39SAndroid Build Coastguard Worker * Thread safe cloning operation. 331*0e209d39SAndroid Build Coastguard Worker * @param bi iterator to be cloned 332*0e209d39SAndroid Build Coastguard Worker * @param status to indicate whether the operation went on smoothly or there were errors 333*0e209d39SAndroid Build Coastguard Worker * @return pointer to the new clone 334*0e209d39SAndroid Build Coastguard Worker * @stable ICU 69 335*0e209d39SAndroid Build Coastguard Worker */ 336*0e209d39SAndroid Build Coastguard Worker U_CAPI UBreakIterator * U_EXPORT2 337*0e209d39SAndroid Build Coastguard Worker ubrk_clone(const UBreakIterator *bi, 338*0e209d39SAndroid Build Coastguard Worker UErrorCode *status); 339*0e209d39SAndroid Build Coastguard Worker 340*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_DEPRECATED_API 341*0e209d39SAndroid Build Coastguard Worker 342*0e209d39SAndroid Build Coastguard Worker /** 343*0e209d39SAndroid Build Coastguard Worker * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone(). 344*0e209d39SAndroid Build Coastguard Worker * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer. 345*0e209d39SAndroid Build Coastguard Worker */ 346*0e209d39SAndroid Build Coastguard Worker #define U_BRK_SAFECLONE_BUFFERSIZE 1 347*0e209d39SAndroid Build Coastguard Worker 348*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_DEPRECATED_API */ 349*0e209d39SAndroid Build Coastguard Worker 350*0e209d39SAndroid Build Coastguard Worker /** 351*0e209d39SAndroid Build Coastguard Worker * Close a UBreakIterator. 352*0e209d39SAndroid Build Coastguard Worker * Once closed, a UBreakIterator may no longer be used. 353*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to close. 354*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 355*0e209d39SAndroid Build Coastguard Worker */ 356*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 357*0e209d39SAndroid Build Coastguard Worker ubrk_close(UBreakIterator *bi); 358*0e209d39SAndroid Build Coastguard Worker 359*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API 360*0e209d39SAndroid Build Coastguard Worker 361*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 362*0e209d39SAndroid Build Coastguard Worker 363*0e209d39SAndroid Build Coastguard Worker /** 364*0e209d39SAndroid Build Coastguard Worker * \class LocalUBreakIteratorPointer 365*0e209d39SAndroid Build Coastguard Worker * "Smart pointer" class, closes a UBreakIterator via ubrk_close(). 366*0e209d39SAndroid Build Coastguard Worker * For most methods see the LocalPointerBase base class. 367*0e209d39SAndroid Build Coastguard Worker * 368*0e209d39SAndroid Build Coastguard Worker * @see LocalPointerBase 369*0e209d39SAndroid Build Coastguard Worker * @see LocalPointer 370*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.4 371*0e209d39SAndroid Build Coastguard Worker */ 372*0e209d39SAndroid Build Coastguard Worker U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close); 373*0e209d39SAndroid Build Coastguard Worker 374*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 375*0e209d39SAndroid Build Coastguard Worker 376*0e209d39SAndroid Build Coastguard Worker #endif 377*0e209d39SAndroid Build Coastguard Worker 378*0e209d39SAndroid Build Coastguard Worker /** 379*0e209d39SAndroid Build Coastguard Worker * Sets an existing iterator to point to a new piece of text. 380*0e209d39SAndroid Build Coastguard Worker * The break iterator retains a pointer to the supplied text. 381*0e209d39SAndroid Build Coastguard Worker * The caller must not modify or delete the text while the BreakIterator 382*0e209d39SAndroid Build Coastguard Worker * retains the reference. 383*0e209d39SAndroid Build Coastguard Worker * 384*0e209d39SAndroid Build Coastguard Worker * @param bi The iterator to use 385*0e209d39SAndroid Build Coastguard Worker * @param text The text to be set 386*0e209d39SAndroid Build Coastguard Worker * @param textLength The length of the text 387*0e209d39SAndroid Build Coastguard Worker * @param status The error code 388*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 389*0e209d39SAndroid Build Coastguard Worker */ 390*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 391*0e209d39SAndroid Build Coastguard Worker ubrk_setText(UBreakIterator* bi, 392*0e209d39SAndroid Build Coastguard Worker const UChar* text, 393*0e209d39SAndroid Build Coastguard Worker int32_t textLength, 394*0e209d39SAndroid Build Coastguard Worker UErrorCode* status); 395*0e209d39SAndroid Build Coastguard Worker 396*0e209d39SAndroid Build Coastguard Worker 397*0e209d39SAndroid Build Coastguard Worker /** 398*0e209d39SAndroid Build Coastguard Worker * Sets an existing iterator to point to a new piece of text. 399*0e209d39SAndroid Build Coastguard Worker * 400*0e209d39SAndroid Build Coastguard Worker * All index positions returned by break iterator functions are 401*0e209d39SAndroid Build Coastguard Worker * native indices from the UText. For example, when breaking UTF-8 402*0e209d39SAndroid Build Coastguard Worker * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc. 403*0e209d39SAndroid Build Coastguard Worker * will be UTF-8 string indices, not UTF-16 positions. 404*0e209d39SAndroid Build Coastguard Worker * 405*0e209d39SAndroid Build Coastguard Worker * @param bi The iterator to use 406*0e209d39SAndroid Build Coastguard Worker * @param text The text to be set. 407*0e209d39SAndroid Build Coastguard Worker * This function makes a shallow clone of the supplied UText. This means 408*0e209d39SAndroid Build Coastguard Worker * that the caller is free to immediately close or otherwise reuse the 409*0e209d39SAndroid Build Coastguard Worker * UText that was passed as a parameter, but that the underlying text itself 410*0e209d39SAndroid Build Coastguard Worker * must not be altered while being referenced by the break iterator. 411*0e209d39SAndroid Build Coastguard Worker * @param status The error code 412*0e209d39SAndroid Build Coastguard Worker * @stable ICU 3.4 413*0e209d39SAndroid Build Coastguard Worker */ 414*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 415*0e209d39SAndroid Build Coastguard Worker ubrk_setUText(UBreakIterator* bi, 416*0e209d39SAndroid Build Coastguard Worker UText* text, 417*0e209d39SAndroid Build Coastguard Worker UErrorCode* status); 418*0e209d39SAndroid Build Coastguard Worker 419*0e209d39SAndroid Build Coastguard Worker 420*0e209d39SAndroid Build Coastguard Worker 421*0e209d39SAndroid Build Coastguard Worker /** 422*0e209d39SAndroid Build Coastguard Worker * Determine the most recently-returned text boundary. 423*0e209d39SAndroid Build Coastguard Worker * 424*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 425*0e209d39SAndroid Build Coastguard Worker * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous, 426*0e209d39SAndroid Build Coastguard Worker * \ref ubrk_first, or \ref ubrk_last. 427*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 428*0e209d39SAndroid Build Coastguard Worker */ 429*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 430*0e209d39SAndroid Build Coastguard Worker ubrk_current(const UBreakIterator *bi); 431*0e209d39SAndroid Build Coastguard Worker 432*0e209d39SAndroid Build Coastguard Worker /** 433*0e209d39SAndroid Build Coastguard Worker * Advance the iterator to the boundary following the current boundary. 434*0e209d39SAndroid Build Coastguard Worker * 435*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 436*0e209d39SAndroid Build Coastguard Worker * @return The character index of the next text boundary, or UBRK_DONE 437*0e209d39SAndroid Build Coastguard Worker * if all text boundaries have been returned. 438*0e209d39SAndroid Build Coastguard Worker * @see ubrk_previous 439*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 440*0e209d39SAndroid Build Coastguard Worker */ 441*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 442*0e209d39SAndroid Build Coastguard Worker ubrk_next(UBreakIterator *bi); 443*0e209d39SAndroid Build Coastguard Worker 444*0e209d39SAndroid Build Coastguard Worker /** 445*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to the boundary preceding the current boundary. 446*0e209d39SAndroid Build Coastguard Worker * 447*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 448*0e209d39SAndroid Build Coastguard Worker * @return The character index of the preceding text boundary, or UBRK_DONE 449*0e209d39SAndroid Build Coastguard Worker * if all text boundaries have been returned. 450*0e209d39SAndroid Build Coastguard Worker * @see ubrk_next 451*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 452*0e209d39SAndroid Build Coastguard Worker */ 453*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 454*0e209d39SAndroid Build Coastguard Worker ubrk_previous(UBreakIterator *bi); 455*0e209d39SAndroid Build Coastguard Worker 456*0e209d39SAndroid Build Coastguard Worker /** 457*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to zero, the start of the text being scanned. 458*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 459*0e209d39SAndroid Build Coastguard Worker * @return The new iterator position (zero). 460*0e209d39SAndroid Build Coastguard Worker * @see ubrk_last 461*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 462*0e209d39SAndroid Build Coastguard Worker */ 463*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 464*0e209d39SAndroid Build Coastguard Worker ubrk_first(UBreakIterator *bi); 465*0e209d39SAndroid Build Coastguard Worker 466*0e209d39SAndroid Build Coastguard Worker /** 467*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned. 468*0e209d39SAndroid Build Coastguard Worker * This is not the same as the last character. 469*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 470*0e209d39SAndroid Build Coastguard Worker * @return The character offset immediately <EM>beyond</EM> the last character in the 471*0e209d39SAndroid Build Coastguard Worker * text being scanned. 472*0e209d39SAndroid Build Coastguard Worker * @see ubrk_first 473*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 474*0e209d39SAndroid Build Coastguard Worker */ 475*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 476*0e209d39SAndroid Build Coastguard Worker ubrk_last(UBreakIterator *bi); 477*0e209d39SAndroid Build Coastguard Worker 478*0e209d39SAndroid Build Coastguard Worker /** 479*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to the first boundary preceding the specified offset. 480*0e209d39SAndroid Build Coastguard Worker * The new position is always smaller than offset, or UBRK_DONE. 481*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 482*0e209d39SAndroid Build Coastguard Worker * @param offset The offset to begin scanning. 483*0e209d39SAndroid Build Coastguard Worker * @return The text boundary preceding offset, or UBRK_DONE. 484*0e209d39SAndroid Build Coastguard Worker * @see ubrk_following 485*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 486*0e209d39SAndroid Build Coastguard Worker */ 487*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 488*0e209d39SAndroid Build Coastguard Worker ubrk_preceding(UBreakIterator *bi, 489*0e209d39SAndroid Build Coastguard Worker int32_t offset); 490*0e209d39SAndroid Build Coastguard Worker 491*0e209d39SAndroid Build Coastguard Worker /** 492*0e209d39SAndroid Build Coastguard Worker * Advance the iterator to the first boundary following the specified offset. 493*0e209d39SAndroid Build Coastguard Worker * The value returned is always greater than offset, or UBRK_DONE. 494*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 495*0e209d39SAndroid Build Coastguard Worker * @param offset The offset to begin scanning. 496*0e209d39SAndroid Build Coastguard Worker * @return The text boundary following offset, or UBRK_DONE. 497*0e209d39SAndroid Build Coastguard Worker * @see ubrk_preceding 498*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 499*0e209d39SAndroid Build Coastguard Worker */ 500*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 501*0e209d39SAndroid Build Coastguard Worker ubrk_following(UBreakIterator *bi, 502*0e209d39SAndroid Build Coastguard Worker int32_t offset); 503*0e209d39SAndroid Build Coastguard Worker 504*0e209d39SAndroid Build Coastguard Worker /** 505*0e209d39SAndroid Build Coastguard Worker * Get a locale for which text breaking information is available. 506*0e209d39SAndroid Build Coastguard Worker * A UBreakIterator in a locale returned by this function will perform the correct 507*0e209d39SAndroid Build Coastguard Worker * text breaking for the locale. 508*0e209d39SAndroid Build Coastguard Worker * @param index The index of the desired locale. 509*0e209d39SAndroid Build Coastguard Worker * @return A locale for which number text breaking information is available, or 0 if none. 510*0e209d39SAndroid Build Coastguard Worker * @see ubrk_countAvailable 511*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 512*0e209d39SAndroid Build Coastguard Worker */ 513*0e209d39SAndroid Build Coastguard Worker U_CAPI const char* U_EXPORT2 514*0e209d39SAndroid Build Coastguard Worker ubrk_getAvailable(int32_t index); 515*0e209d39SAndroid Build Coastguard Worker 516*0e209d39SAndroid Build Coastguard Worker /** 517*0e209d39SAndroid Build Coastguard Worker * Determine how many locales have text breaking information available. 518*0e209d39SAndroid Build Coastguard Worker * This function is most useful as determining the loop ending condition for 519*0e209d39SAndroid Build Coastguard Worker * calls to \ref ubrk_getAvailable. 520*0e209d39SAndroid Build Coastguard Worker * @return The number of locales for which text breaking information is available. 521*0e209d39SAndroid Build Coastguard Worker * @see ubrk_getAvailable 522*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 523*0e209d39SAndroid Build Coastguard Worker */ 524*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 525*0e209d39SAndroid Build Coastguard Worker ubrk_countAvailable(void); 526*0e209d39SAndroid Build Coastguard Worker 527*0e209d39SAndroid Build Coastguard Worker 528*0e209d39SAndroid Build Coastguard Worker /** 529*0e209d39SAndroid Build Coastguard Worker * Returns true if the specified position is a boundary position. As a side 530*0e209d39SAndroid Build Coastguard Worker * effect, leaves the iterator pointing to the first boundary position at 531*0e209d39SAndroid Build Coastguard Worker * or after "offset". 532*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 533*0e209d39SAndroid Build Coastguard Worker * @param offset the offset to check. 534*0e209d39SAndroid Build Coastguard Worker * @return True if "offset" is a boundary position. 535*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 536*0e209d39SAndroid Build Coastguard Worker */ 537*0e209d39SAndroid Build Coastguard Worker U_CAPI UBool U_EXPORT2 538*0e209d39SAndroid Build Coastguard Worker ubrk_isBoundary(UBreakIterator *bi, int32_t offset); 539*0e209d39SAndroid Build Coastguard Worker 540*0e209d39SAndroid Build Coastguard Worker /** 541*0e209d39SAndroid Build Coastguard Worker * Return the status from the break rule that determined the most recently 542*0e209d39SAndroid Build Coastguard Worker * returned break position. The values appear in the rule source 543*0e209d39SAndroid Build Coastguard Worker * within brackets, {123}, for example. For rules that do not specify a 544*0e209d39SAndroid Build Coastguard Worker * status, a default value of 0 is returned. 545*0e209d39SAndroid Build Coastguard Worker * <p> 546*0e209d39SAndroid Build Coastguard Worker * For word break iterators, the possible values are defined in enum UWordBreak. 547*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.2 548*0e209d39SAndroid Build Coastguard Worker */ 549*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 550*0e209d39SAndroid Build Coastguard Worker ubrk_getRuleStatus(UBreakIterator *bi); 551*0e209d39SAndroid Build Coastguard Worker 552*0e209d39SAndroid Build Coastguard Worker /** 553*0e209d39SAndroid Build Coastguard Worker * Get the statuses from the break rules that determined the most recently 554*0e209d39SAndroid Build Coastguard Worker * returned break position. The values appear in the rule source 555*0e209d39SAndroid Build Coastguard Worker * within brackets, {123}, for example. The default status value for rules 556*0e209d39SAndroid Build Coastguard Worker * that do not explicitly provide one is zero. 557*0e209d39SAndroid Build Coastguard Worker * <p> 558*0e209d39SAndroid Build Coastguard Worker * For word break iterators, the possible values are defined in enum UWordBreak. 559*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use 560*0e209d39SAndroid Build Coastguard Worker * @param fillInVec an array to be filled in with the status values. 561*0e209d39SAndroid Build Coastguard Worker * @param capacity the length of the supplied vector. A length of zero causes 562*0e209d39SAndroid Build Coastguard Worker * the function to return the number of status values, in the 563*0e209d39SAndroid Build Coastguard Worker * normal way, without attempting to store any values. 564*0e209d39SAndroid Build Coastguard Worker * @param status receives error codes. 565*0e209d39SAndroid Build Coastguard Worker * @return The number of rule status values from rules that determined 566*0e209d39SAndroid Build Coastguard Worker * the most recent boundary returned by the break iterator. 567*0e209d39SAndroid Build Coastguard Worker * @stable ICU 3.0 568*0e209d39SAndroid Build Coastguard Worker */ 569*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 570*0e209d39SAndroid Build Coastguard Worker ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status); 571*0e209d39SAndroid Build Coastguard Worker 572*0e209d39SAndroid Build Coastguard Worker /** 573*0e209d39SAndroid Build Coastguard Worker * Return the locale of the break iterator. You can choose between the valid and 574*0e209d39SAndroid Build Coastguard Worker * the actual locale. 575*0e209d39SAndroid Build Coastguard Worker * @param bi break iterator 576*0e209d39SAndroid Build Coastguard Worker * @param type locale type (valid or actual) 577*0e209d39SAndroid Build Coastguard Worker * @param status error code 578*0e209d39SAndroid Build Coastguard Worker * @return locale string 579*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.8 580*0e209d39SAndroid Build Coastguard Worker */ 581*0e209d39SAndroid Build Coastguard Worker U_CAPI const char* U_EXPORT2 582*0e209d39SAndroid Build Coastguard Worker ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status); 583*0e209d39SAndroid Build Coastguard Worker 584*0e209d39SAndroid Build Coastguard Worker /** 585*0e209d39SAndroid Build Coastguard Worker * Set the subject text string upon which the break iterator is operating 586*0e209d39SAndroid Build Coastguard Worker * without changing any other aspect of the state. 587*0e209d39SAndroid Build Coastguard Worker * The new and previous text strings must have the same content. 588*0e209d39SAndroid Build Coastguard Worker * 589*0e209d39SAndroid Build Coastguard Worker * This function is intended for use in environments where ICU is operating on 590*0e209d39SAndroid Build Coastguard Worker * strings that may move around in memory. It provides a mechanism for notifying 591*0e209d39SAndroid Build Coastguard Worker * ICU that the string has been relocated, and providing a new UText to access the 592*0e209d39SAndroid Build Coastguard Worker * string in its new position. 593*0e209d39SAndroid Build Coastguard Worker * 594*0e209d39SAndroid Build Coastguard Worker * Note that the break iterator never copies the underlying text 595*0e209d39SAndroid Build Coastguard Worker * of a string being processed, but always operates directly on the original text 596*0e209d39SAndroid Build Coastguard Worker * provided by the user. Refreshing simply drops the references to the old text 597*0e209d39SAndroid Build Coastguard Worker * and replaces them with references to the new. 598*0e209d39SAndroid Build Coastguard Worker * 599*0e209d39SAndroid Build Coastguard Worker * Caution: this function is normally used only by very specialized 600*0e209d39SAndroid Build Coastguard Worker * system-level code. One example use case is with garbage collection 601*0e209d39SAndroid Build Coastguard Worker * that moves the text in memory. 602*0e209d39SAndroid Build Coastguard Worker * 603*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator. 604*0e209d39SAndroid Build Coastguard Worker * @param text The new (moved) text string. 605*0e209d39SAndroid Build Coastguard Worker * @param status Receives errors detected by this function. 606*0e209d39SAndroid Build Coastguard Worker * 607*0e209d39SAndroid Build Coastguard Worker * @stable ICU 49 608*0e209d39SAndroid Build Coastguard Worker */ 609*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 610*0e209d39SAndroid Build Coastguard Worker ubrk_refreshUText(UBreakIterator *bi, 611*0e209d39SAndroid Build Coastguard Worker UText *text, 612*0e209d39SAndroid Build Coastguard Worker UErrorCode *status); 613*0e209d39SAndroid Build Coastguard Worker 614*0e209d39SAndroid Build Coastguard Worker 615*0e209d39SAndroid Build Coastguard Worker /** 616*0e209d39SAndroid Build Coastguard Worker * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator. 617*0e209d39SAndroid Build Coastguard Worker * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator 618*0e209d39SAndroid Build Coastguard Worker * more quickly than using ubrk_openRules. The compiled rules are not compatible across 619*0e209d39SAndroid Build Coastguard Worker * different major versions of ICU, nor across platforms of different endianness or 620*0e209d39SAndroid Build Coastguard Worker * different base character set family (ASCII vs EBCDIC). Supports preflighting (with 621*0e209d39SAndroid Build Coastguard Worker * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to 622*0e209d39SAndroid Build Coastguard Worker * the binaryRules buffer. However, whether preflighting or not, if the actual length 623*0e209d39SAndroid Build Coastguard Worker * is greater than INT32_MAX, then the function returns 0 and sets *status to 624*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR. 625*0e209d39SAndroid Build Coastguard Worker 626*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 627*0e209d39SAndroid Build Coastguard Worker * @param binaryRules Buffer to receive the compiled binary rules; set to NULL for 628*0e209d39SAndroid Build Coastguard Worker * preflighting. 629*0e209d39SAndroid Build Coastguard Worker * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for 630*0e209d39SAndroid Build Coastguard Worker * preflighting. Must be >= 0. 631*0e209d39SAndroid Build Coastguard Worker * @param status Pointer to UErrorCode to receive any errors, such as 632*0e209d39SAndroid Build Coastguard Worker * U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or 633*0e209d39SAndroid Build Coastguard Worker * U_ILLEGAL_ARGUMENT_ERROR. 634*0e209d39SAndroid Build Coastguard Worker * @return The actual byte length of the binary rules, if <= INT32_MAX; 635*0e209d39SAndroid Build Coastguard Worker * otherwise 0. If not preflighting and this is larger than 636*0e209d39SAndroid Build Coastguard Worker * rulesCapacity, *status will be set to an error. 637*0e209d39SAndroid Build Coastguard Worker * @see ubrk_openBinaryRules 638*0e209d39SAndroid Build Coastguard Worker * @stable ICU 59 639*0e209d39SAndroid Build Coastguard Worker */ 640*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 641*0e209d39SAndroid Build Coastguard Worker ubrk_getBinaryRules(UBreakIterator *bi, 642*0e209d39SAndroid Build Coastguard Worker uint8_t * binaryRules, int32_t rulesCapacity, 643*0e209d39SAndroid Build Coastguard Worker UErrorCode * status); 644*0e209d39SAndroid Build Coastguard Worker 645*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 646*0e209d39SAndroid Build Coastguard Worker 647*0e209d39SAndroid Build Coastguard Worker #endif 648