1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (c) 1996-2015, International Business Machines Corporation and others.
6 * All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 #ifndef UCOL_H
11 #define UCOL_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_COLLATION
16 
17 #include "unicode/parseerr.h"
18 #include "unicode/uloc.h"
19 #include "unicode/uscript.h"
20 
21 #if U_SHOW_CPLUSPLUS_API
22 #include "unicode/localpointer.h"
23 #endif   // U_SHOW_CPLUSPLUS_API
24 
25 /**
26  * @addtogroup icu4c ICU4C
27  * @{
28  * \file
29  * \brief C API: Collator
30  *
31  * <h2> Collator C API </h2>
32  *
33  * The C API for Collator performs locale-sensitive
34  * string comparison. You use this service to build
35  * searching and sorting routines for natural language text.
36  * <p>
37  * For more information about the collation service see
38  * <a href="https://unicode-org.github.io/icu/userguide/collation">the User Guide</a>.
39  * <p>
40  * Collation service provides correct sorting orders for most locales supported in ICU.
41  * If specific data for a locale is not available, the orders eventually falls back
42  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
43  * <p>
44  * Sort ordering may be customized by providing your own set of rules. For more on
45  * this subject see the <a href="https://unicode-org.github.io/icu/userguide/collation/customization">
46  * Collation Customization</a> section of the User Guide.
47  * <p>
48  * @see         UCollationResult
49  * @see         UNormalizationMode
50  * @see         UCollationStrength
51  * @see         UCollationElements
52  */
53 
54 /** A collator.
55 *  For usage in C programs.
56 */
57 struct UCollator;
58 /** structure representing a collator object instance
59  * \xrefitem stable "Stable" "Stable List" ICU 2.0
60  */
61 typedef struct UCollator UCollator;
62 
63 
64 /**
65  * UCOL_LESS is returned if source string is compared to be less than target
66  * string in the ucol_strcoll() method.
67  * UCOL_EQUAL is returned if source string is compared to be equal to target
68  * string in the ucol_strcoll() method.
69  * UCOL_GREATER is returned if source string is compared to be greater than
70  * target string in the ucol_strcoll() method.
71  * @see ucol_strcoll()
72  * <p>
73  * Possible values for a comparison result
74  * \xrefitem stable "Stable" "Stable List" ICU 2.0
75  */
76 typedef enum UCollationResult : int32_t {
77   /** string a == string b */
78   UCOL_EQUAL    = 0,
79   /** string a > string b */
80   UCOL_GREATER    = 1,
81   /** string a < string b */
82   UCOL_LESS    = -1
83 } UCollationResult ;
84 
85 
86 /** Enum containing attribute values for controlling collation behavior.
87  * Here are all the allowable values. Not every attribute can take every value. The only
88  * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined
89  * value for that locale
90  * \xrefitem stable "Stable" "Stable List" ICU 2.0
91  */
92 typedef enum UColAttributeValue : int32_t {
93   /** accepted by most attributes */
94   UCOL_DEFAULT = -1,
95 
96   /** Primary collation strength */
97   UCOL_PRIMARY = 0,
98   /** Secondary collation strength */
99   UCOL_SECONDARY = 1,
100   /** Tertiary collation strength */
101   UCOL_TERTIARY = 2,
102   /** Default collation strength */
103   UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
104   UCOL_CE_STRENGTH_LIMIT,
105   /** Quaternary collation strength */
106   UCOL_QUATERNARY=3,
107   /** Identical collation strength */
108   UCOL_IDENTICAL=15,
109   UCOL_STRENGTH_LIMIT,
110 
111   /** Turn the feature off - works for UCOL_FRENCH_COLLATION,
112       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
113       & UCOL_DECOMPOSITION_MODE*/
114   UCOL_OFF = 16,
115   /** Turn the feature on - works for UCOL_FRENCH_COLLATION,
116       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
117       & UCOL_DECOMPOSITION_MODE*/
118   UCOL_ON = 17,
119 
120   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
121   UCOL_SHIFTED = 20,
122   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
123   UCOL_NON_IGNORABLE = 21,
124 
125   /** Valid for UCOL_CASE_FIRST -
126       lower case sorts before upper case */
127   UCOL_LOWER_FIRST = 24,
128   /** upper case sorts before lower case */
129   UCOL_UPPER_FIRST = 25,
130 
131 #ifndef U_HIDE_DEPRECATED_API
132     /**
133      * One more than the highest normal UColAttributeValue value.
134      * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420.
135      */
136   UCOL_ATTRIBUTE_VALUE_COUNT
137 #endif  /* U_HIDE_DEPRECATED_API */
138 } UColAttributeValue;
139 
140 /**
141  * Enum containing the codes for reordering segments of the collation table that are not script
142  * codes. These reordering codes are to be used in conjunction with the script codes.
143  * @see ucol_getReorderCodes
144  * @see ucol_setReorderCodes
145  * @see ucol_getEquivalentReorderCodes
146  * @see UScriptCode
147  * \xrefitem stable "Stable" "Stable List" ICU 4.8
148  */
149  typedef enum UColReorderCode : int32_t {
150    /**
151     * A special reordering code that is used to specify the default
152     * reordering codes for a locale.
153     * \xrefitem stable "Stable" "Stable List" ICU 4.8
154     */
155     UCOL_REORDER_CODE_DEFAULT       = -1,
156    /**
157     * A special reordering code that is used to specify no reordering codes.
158     * \xrefitem stable "Stable" "Stable List" ICU 4.8
159     */
160     UCOL_REORDER_CODE_NONE          = USCRIPT_UNKNOWN,
161    /**
162     * A special reordering code that is used to specify all other codes used for
163     * reordering except for the codes lised as UColReorderCode values and those
164     * listed explicitly in a reordering.
165     * \xrefitem stable "Stable" "Stable List" ICU 4.8
166     */
167     UCOL_REORDER_CODE_OTHERS        = USCRIPT_UNKNOWN,
168    /**
169     * Characters with the space property.
170     * This is equivalent to the rule value "space".
171     * \xrefitem stable "Stable" "Stable List" ICU 4.8
172     */
173     UCOL_REORDER_CODE_SPACE         = 0x1000,
174    /**
175     * The first entry in the enumeration of reordering groups. This is intended for use in
176     * range checking and enumeration of the reorder codes.
177     * \xrefitem stable "Stable" "Stable List" ICU 4.8
178     */
179     UCOL_REORDER_CODE_FIRST         = UCOL_REORDER_CODE_SPACE,
180    /**
181     * Characters with the punctuation property.
182     * This is equivalent to the rule value "punct".
183     * \xrefitem stable "Stable" "Stable List" ICU 4.8
184     */
185     UCOL_REORDER_CODE_PUNCTUATION   = 0x1001,
186    /**
187     * Characters with the symbol property.
188     * This is equivalent to the rule value "symbol".
189     * \xrefitem stable "Stable" "Stable List" ICU 4.8
190     */
191     UCOL_REORDER_CODE_SYMBOL        = 0x1002,
192    /**
193     * Characters with the currency property.
194     * This is equivalent to the rule value "currency".
195     * \xrefitem stable "Stable" "Stable List" ICU 4.8
196     */
197     UCOL_REORDER_CODE_CURRENCY      = 0x1003,
198    /**
199     * Characters with the digit property.
200     * This is equivalent to the rule value "digit".
201     * \xrefitem stable "Stable" "Stable List" ICU 4.8
202     */
203     UCOL_REORDER_CODE_DIGIT         = 0x1004,
204 #ifndef U_HIDE_DEPRECATED_API
205     /**
206      * One more than the highest normal UColReorderCode value.
207      * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420.
208      */
209     UCOL_REORDER_CODE_LIMIT         = 0x1005
210 #endif  /* U_HIDE_DEPRECATED_API */
211 } UColReorderCode;
212 
213 /**
214  * Base letter represents a primary difference.  Set comparison
215  * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
216  * Use this to set the strength of a Collator object.
217  * Example of primary difference, "abc" &lt; "abd"
218  *
219  * Diacritical differences on the same base letter represent a secondary
220  * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
221  * differences. Use this to set the strength of a Collator object.
222  * Example of secondary difference, "&auml;" >> "a".
223  *
224  * Uppercase and lowercase versions of the same character represents a
225  * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
226  * all comparison differences. Use this to set the strength of a Collator
227  * object.
228  * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
229  *
230  * Two characters are considered "identical" when they have the same
231  * unicode spellings.  UCOL_IDENTICAL.
232  * For example, "&auml;" == "&auml;".
233  *
234  * UCollationStrength is also used to determine the strength of sort keys
235  * generated from UCollator objects
236  * These values can be now found in the UColAttributeValue enum.
237  * \xrefitem stable "Stable" "Stable List" ICU 2.0
238  **/
239 typedef UColAttributeValue UCollationStrength;
240 
241 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
242  * value, as well as the values specific to each one.
243  * \xrefitem stable "Stable" "Stable List" ICU 2.0
244  */
245 typedef enum UColAttribute : int32_t {
246      /** Attribute for direction of secondary weights - used in Canadian French.
247       * Acceptable values are UCOL_ON, which results in secondary weights
248       * being considered backwards and UCOL_OFF which treats secondary
249       * weights in the order they appear.
250       * \xrefitem stable "Stable" "Stable List" ICU 2.0
251       */
252      UCOL_FRENCH_COLLATION,
253      /** Attribute for handling variable elements.
254       * Acceptable values are UCOL_NON_IGNORABLE
255       * which treats all the codepoints with non-ignorable
256       * primary weights in the same way,
257       * and UCOL_SHIFTED which causes codepoints with primary
258       * weights that are equal or below the variable top value
259       * to be ignored on primary level and moved to the quaternary
260       * level. The default setting in a Collator object depends on the
261       * locale data loaded from the resources. For most locales, the
262       * default is UCOL_NON_IGNORABLE, but for others, such as "th",
263       * the default could be UCOL_SHIFTED.
264       * \xrefitem stable "Stable" "Stable List" ICU 2.0
265       */
266      UCOL_ALTERNATE_HANDLING,
267      /** Controls the ordering of upper and lower case letters.
268       * Acceptable values are UCOL_OFF, which orders
269       * upper and lower case letters in accordance to their tertiary
270       * weights, UCOL_UPPER_FIRST which forces upper case letters to
271       * sort before lower case letters, and UCOL_LOWER_FIRST which does
272       * the opposite. The default setting in a Collator object depends on the
273       * locale data loaded from the resources. For most locales, the
274       * default is UCOL_OFF, but for others, such as "da" or "mt",
275       * the default could be UCOL_UPPER.
276       * \xrefitem stable "Stable" "Stable List" ICU 2.0
277       */
278      UCOL_CASE_FIRST,
279      /** Controls whether an extra case level (positioned before the third
280       * level) is generated or not. Acceptable values are UCOL_OFF,
281       * when case level is not generated, and UCOL_ON which causes the case
282       * level to be generated. Contents of the case level are affected by
283       * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
284       * accent differences in a string is to set the strength to UCOL_PRIMARY
285       * and enable case level. The default setting in a Collator object depends
286       * on the locale data loaded from the resources.
287       * \xrefitem stable "Stable" "Stable List" ICU 2.0
288       */
289      UCOL_CASE_LEVEL,
290      /** Controls whether the normalization check and necessary normalizations
291       * are performed. When set to UCOL_OFF no normalization check
292       * is performed. The correctness of the result is guaranteed only if the
293       * input data is in so-called FCD form (see users manual for more info).
294       * When set to UCOL_ON, an incremental check is performed to see whether
295       * the input data is in the FCD form. If the data is not in the FCD form,
296       * incremental NFD normalization is performed. The default setting in a
297       * Collator object depends on the locale data loaded from the resources.
298       * For many locales, the default is UCOL_OFF, but for others, such as "hi"
299       * "vi', or "bn", * the default could be UCOL_ON.
300       * \xrefitem stable "Stable" "Stable List" ICU 2.0
301       */
302      UCOL_NORMALIZATION_MODE,
303      /** An alias for UCOL_NORMALIZATION_MODE attribute.
304       * \xrefitem stable "Stable" "Stable List" ICU 2.0
305       */
306      UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
307      /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
308       * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
309       * for most locales (except Japanese) is tertiary.
310       *
311       * Quaternary strength
312       * is useful when combined with shifted setting for alternate handling
313       * attribute and for JIS X 4061 collation, when it is used to distinguish
314       * between Katakana and Hiragana.
315       * Otherwise, quaternary level
316       * is affected only by the number of non-ignorable code points in
317       * the string.
318       *
319       * Identical strength is rarely useful, as it amounts
320       * to codepoints of the NFD form of the string.
321       * \xrefitem stable "Stable" "Stable List" ICU 2.0
322       */
323      UCOL_STRENGTH,
324 #ifndef U_HIDE_DEPRECATED_API
325      /** When turned on, this attribute positions Hiragana before all
326       * non-ignorables on quaternary level This is a sneaky way to produce JIS
327       * sort order.
328       *
329       * This attribute was an implementation detail of the CLDR Japanese tailoring.
330       * Since ICU 50, this attribute is not settable any more via API functions.
331       * Since CLDR 25/ICU 53, explicit quaternary relations are used
332       * to achieve the same Japanese sort order.
333       *
334       * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
335       */
336      UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
337 #endif  /* U_HIDE_DEPRECATED_API */
338      /**
339       * When turned on, this attribute makes
340       * substrings of digits sort according to their numeric values.
341       *
342       * This is a way to get '100' to sort AFTER '2'. Note that the longest
343       * digit substring that can be treated as a single unit is
344       * 254 digits (not counting leading zeros). If a digit substring is
345       * longer than that, the digits beyond the limit will be treated as a
346       * separate digit substring.
347       *
348       * A "digit" in this sense is a code point with General_Category=Nd,
349       * which does not include circled numbers, roman numerals, etc.
350       * Only a contiguous digit substring is considered, that is,
351       * non-negative integers without separators.
352       * There is no support for plus/minus signs, decimals, exponents, etc.
353       *
354       * \xrefitem stable "Stable" "Stable List" ICU 2.8
355       */
356      UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
357 
358     /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API,
359      * it is needed for layout of RuleBasedCollator object. */
360 #ifndef U_FORCE_HIDE_DEPRECATED_API
361     /**
362      * One more than the highest normal UColAttribute value.
363      * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420.
364      */
365      UCOL_ATTRIBUTE_COUNT
366 #endif  // U_FORCE_HIDE_DEPRECATED_API
367 } UColAttribute;
368 
369 
370 /**
371  * Open a UCollator for comparing strings.
372  *
373  * For some languages, multiple collation types are available;
374  * for example, "de@collation=phonebook".
375  * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
376  * in the old locale extension syntax ("el@colCaseFirst=upper")
377  * or in language tag syntax ("el-u-kf-upper").
378  * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>.
379  *
380  * The UCollator pointer is used in all the calls to the Collation
381  * service. After finished, collator must be disposed of by calling
382  * {@link #ucol_close }.
383  * @param loc The locale containing the required collation rules.
384  *            Special values for locales can be passed in -
385  *            if NULL is passed for the locale, the default locale
386  *            collation rules will be used. If empty string ("") or
387  *            "root" are passed, the root collator will be returned.
388  * @param status A pointer to a UErrorCode to receive any errors
389  * @return A pointer to a UCollator, or 0 if an error occurred.
390  * @see ucol_openRules
391  * @see ucol_clone
392  * @see ucol_close
393  * \xrefitem stable "Stable" "Stable List" ICU 2.0
394  */
395 U_CAPI UCollator* U_EXPORT2
396 ucol_open(const char *loc, UErrorCode *status) __INTRODUCED_IN(__ANDROID_API_T__);
397 
398 
399 
400 /**
401  * Produce a UCollator instance according to the rules supplied.
402  * The rules are used to change the default ordering, defined in the
403  * UCA in a process called tailoring. The resulting UCollator pointer
404  * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
405  * @param rules A string describing the collation rules. For the syntax
406  *              of the rules please see users guide.
407  * @param rulesLength The length of rules, or -1 if null-terminated.
408  * @param normalizationMode The normalization mode: One of
409  *             UCOL_OFF     (expect the text to not need normalization),
410  *             UCOL_ON      (normalize), or
411  *             UCOL_DEFAULT (set the mode according to the rules)
412  * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
413  * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
414  * @param parseError  A pointer to UParseError to receive information about errors
415  *                    occurred during parsing. This argument can currently be set
416  *                    to NULL, but at users own risk. Please provide a real structure.
417  * @param status A pointer to a UErrorCode to receive any errors
418  * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
419  *         of error - please use status argument to check for errors.
420  * @see ucol_open
421  * @see ucol_clone
422  * @see ucol_close
423  * \xrefitem stable "Stable" "Stable List" ICU 2.0
424  */
425 U_CAPI UCollator* U_EXPORT2
426 ucol_openRules( const UChar        *rules,
427                 int32_t            rulesLength,
428                 UColAttributeValue normalizationMode,
429                 UCollationStrength strength,
430                 UParseError        *parseError,
431                 UErrorCode         *status) __INTRODUCED_IN(__ANDROID_API_T__);
432 
433 
434 
435 #ifndef U_HIDE_DEPRECATED_API
436 
437 #endif  /* U_HIDE_DEPRECATED_API */
438 
439 #ifndef U_HIDE_DEPRECATED_API
440 
441 #endif  /* U_HIDE_DEPRECATED_API */
442 
443 
444 
445 /**
446  * Close a UCollator.
447  * Once closed, a UCollator should not be used. Every open collator should
448  * be closed. Otherwise, a memory leak will result.
449  * @param coll The UCollator to close.
450  * @see ucol_open
451  * @see ucol_openRules
452  * @see ucol_clone
453  * \xrefitem stable "Stable" "Stable List" ICU 2.0
454  */
455 U_CAPI void U_EXPORT2
456 ucol_close(UCollator *coll) __INTRODUCED_IN(__ANDROID_API_T__);
457 
458 
459 
460 #if U_SHOW_CPLUSPLUS_API
461 
462 U_NAMESPACE_BEGIN
463 
464 /**
465  * \class LocalUCollatorPointer
466  * "Smart pointer" class, closes a UCollator via ucol_close().
467  * For most methods see the LocalPointerBase base class.
468  *
469  * @see LocalPointerBase
470  * @see LocalPointer
471  * \xrefitem stable "Stable" "Stable List" ICU 4.4
472  */
473 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
474 
475 U_NAMESPACE_END
476 
477 #endif
478 
479 /**
480  * Compare two strings.
481  * The strings will be compared using the options already specified.
482  * @param coll The UCollator containing the comparison rules.
483  * @param source The source string.
484  * @param sourceLength The length of source, or -1 if null-terminated.
485  * @param target The target string.
486  * @param targetLength The length of target, or -1 if null-terminated.
487  * @return The result of comparing the strings; one of UCOL_EQUAL,
488  * UCOL_GREATER, UCOL_LESS
489  * @see ucol_greater
490  * @see ucol_greaterOrEqual
491  * @see ucol_equal
492  * \xrefitem stable "Stable" "Stable List" ICU 2.0
493  */
494 U_CAPI UCollationResult U_EXPORT2
495 ucol_strcoll(    const    UCollator    *coll,
496         const    UChar        *source,
497         int32_t            sourceLength,
498         const    UChar        *target,
499         int32_t            targetLength) __INTRODUCED_IN(__ANDROID_API_T__);
500 
501 
502 
503 /**
504 * Compare two strings in UTF-8.
505 * The strings will be compared using the options already specified.
506 * Note: When input string contains malformed a UTF-8 byte sequence,
507 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
508 * @param coll The UCollator containing the comparison rules.
509 * @param source The source UTF-8 string.
510 * @param sourceLength The length of source, or -1 if null-terminated.
511 * @param target The target UTF-8 string.
512 * @param targetLength The length of target, or -1 if null-terminated.
513 * @param status A pointer to a UErrorCode to receive any errors
514 * @return The result of comparing the strings; one of UCOL_EQUAL,
515 * UCOL_GREATER, UCOL_LESS
516 * @see ucol_greater
517 * @see ucol_greaterOrEqual
518 * @see ucol_equal
519 * \xrefitem stable "Stable" "Stable List" ICU 50
520 */
521 U_CAPI UCollationResult U_EXPORT2
522 ucol_strcollUTF8(
523         const UCollator *coll,
524         const char      *source,
525         int32_t         sourceLength,
526         const char      *target,
527         int32_t         targetLength,
528         UErrorCode      *status) __INTRODUCED_IN(__ANDROID_API_T__);
529 
530 
531 
532 
533 
534 
535 
536 
537 
538 
539 
540 /**
541  * Get the collation strength used in a UCollator.
542  * The strength influences how strings are compared.
543  * @param coll The UCollator to query.
544  * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
545  * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
546  * @see ucol_setStrength
547  * \xrefitem stable "Stable" "Stable List" ICU 2.0
548  */
549 U_CAPI UCollationStrength U_EXPORT2
550 ucol_getStrength(const UCollator *coll) __INTRODUCED_IN(__ANDROID_API_T__);
551 
552 
553 
554 /**
555  * Set the collation strength used in a UCollator.
556  * The strength influences how strings are compared.
557  * @param coll The UCollator to set.
558  * @param strength The desired collation strength; one of UCOL_PRIMARY,
559  * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
560  * @see ucol_getStrength
561  * \xrefitem stable "Stable" "Stable List" ICU 2.0
562  */
563 U_CAPI void U_EXPORT2
564 ucol_setStrength(UCollator *coll,
565                  UCollationStrength strength) __INTRODUCED_IN(__ANDROID_API_T__);
566 
567 
568 
569 /**
570  * Retrieves the reordering codes for this collator.
571  * These reordering codes are a combination of UScript codes and UColReorderCode entries.
572  * @param coll The UCollator to query.
573  * @param dest The array to fill with the script ordering.
574  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
575  * will only return the length of the result without writing any codes (pre-flighting).
576  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
577  * failure before the function call.
578  * @return The number of reordering codes written to the dest array.
579  * @see ucol_setReorderCodes
580  * @see ucol_getEquivalentReorderCodes
581  * @see UScriptCode
582  * @see UColReorderCode
583  * \xrefitem stable "Stable" "Stable List" ICU 4.8
584  */
585 U_CAPI int32_t U_EXPORT2
586 ucol_getReorderCodes(const UCollator* coll,
587                     int32_t* dest,
588                     int32_t destCapacity,
589                     UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__);
590 
591 
592 /**
593  * Sets the reordering codes for this collator.
594  * Collation reordering allows scripts and some other groups of characters
595  * to be moved relative to each other. This reordering is done on top of
596  * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
597  * at the start and/or the end of the collation order. These groups are specified using
598  * UScript codes and UColReorderCode entries.
599  *
600  * <p>By default, reordering codes specified for the start of the order are placed in the
601  * order given after several special non-script blocks. These special groups of characters
602  * are space, punctuation, symbol, currency, and digit. These special groups are represented with
603  * UColReorderCode entries. Script groups can be intermingled with
604  * these special non-script groups if those special groups are explicitly specified in the reordering.
605  *
606  * <p>The special code OTHERS stands for any script that is not explicitly
607  * mentioned in the list of reordering codes given. Anything that is after OTHERS
608  * will go at the very end of the reordering in the order given.
609  *
610  * <p>The special reorder code DEFAULT will reset the reordering for this collator
611  * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
612  * was specified when this collator was created from resource data or from rules. The
613  * DEFAULT code <b>must</b> be the sole code supplied when it is used.
614  * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
615  *
616  * <p>The special reorder code NONE will remove any reordering for this collator.
617  * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
618  * NONE code <b>must</b> be the sole code supplied when it is used.
619  *
620  * @param coll The UCollator to set.
621  * @param reorderCodes An array of script codes in the new order. This can be NULL if the
622  * length is also set to 0. An empty array will clear any reordering codes on the collator.
623  * @param reorderCodesLength The length of reorderCodes.
624  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
625  * failure before the function call.
626  * @see ucol_getReorderCodes
627  * @see ucol_getEquivalentReorderCodes
628  * @see UScriptCode
629  * @see UColReorderCode
630  * \xrefitem stable "Stable" "Stable List" ICU 4.8
631  */
632 U_CAPI void U_EXPORT2
633 ucol_setReorderCodes(UCollator* coll,
634                     const int32_t* reorderCodes,
635                     int32_t reorderCodesLength,
636                     UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__);
637 
638 
639 
640 /**
641  * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
642  * codes will be grouped and must reorder together.
643  * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
644  * for example Hiragana and Katakana.
645  *
646  * @param reorderCode The reorder code to determine equivalence for.
647  * @param dest The array to fill with the script ordering.
648  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
649  * will only return the length of the result without writing any codes (pre-flighting).
650  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
651  * a failure before the function call.
652  * @return The number of reordering codes written to the dest array.
653  * @see ucol_setReorderCodes
654  * @see ucol_getReorderCodes
655  * @see UScriptCode
656  * @see UColReorderCode
657  * \xrefitem stable "Stable" "Stable List" ICU 4.8
658  */
659 U_CAPI int32_t U_EXPORT2
660 ucol_getEquivalentReorderCodes(int32_t reorderCode,
661                     int32_t* dest,
662                     int32_t destCapacity,
663                     UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__);
664 
665 
666 
667 /**
668  * Get the display name for a UCollator.
669  * The display name is suitable for presentation to a user.
670  * @param objLoc The locale of the collator in question.
671  * @param dispLoc The locale for display.
672  * @param result A pointer to a buffer to receive the attribute.
673  * @param resultLength The maximum size of result.
674  * @param status A pointer to a UErrorCode to receive any errors
675  * @return The total buffer size needed; if greater than resultLength,
676  * the output was truncated.
677  * \xrefitem stable "Stable" "Stable List" ICU 2.0
678  */
679 U_CAPI int32_t U_EXPORT2
680 ucol_getDisplayName(    const    char        *objLoc,
681             const    char        *dispLoc,
682             UChar             *result,
683             int32_t         resultLength,
684             UErrorCode        *status) __INTRODUCED_IN(__ANDROID_API_T__);
685 
686 
687 
688 /**
689  * Get a locale for which collation rules are available.
690  * A UCollator in a locale returned by this function will perform the correct
691  * collation for the locale.
692  * @param localeIndex The index of the desired locale.
693  * @return A locale for which collation rules are available, or 0 if none.
694  * @see ucol_countAvailable
695  * \xrefitem stable "Stable" "Stable List" ICU 2.0
696  */
697 U_CAPI const char* U_EXPORT2
698 ucol_getAvailable(int32_t localeIndex) __INTRODUCED_IN(__ANDROID_API_T__);
699 
700 
701 
702 /**
703  * Determine how many locales have collation rules available.
704  * This function is most useful as determining the loop ending condition for
705  * calls to {@link #ucol_getAvailable }.
706  * @return The number of locales for which collation rules are available.
707  * @see ucol_getAvailable
708  * \xrefitem stable "Stable" "Stable List" ICU 2.0
709  */
710 U_CAPI int32_t U_EXPORT2
711 ucol_countAvailable(void) __INTRODUCED_IN(__ANDROID_API_T__);
712 
713 
714 
715 #if !UCONFIG_NO_SERVICE
716 /**
717  * Create a string enumerator of all locales for which a valid
718  * collator may be opened.
719  * @param status input-output error code
720  * @return a string enumeration over locale strings. The caller is
721  * responsible for closing the result.
722  * \xrefitem stable "Stable" "Stable List" ICU 3.0
723  */
724 U_CAPI UEnumeration* U_EXPORT2
725 ucol_openAvailableLocales(UErrorCode *status) __INTRODUCED_IN(__ANDROID_API_T__);
726 
727 
728 #endif
729 
730 /**
731  * Create a string enumerator of all possible keywords that are relevant to
732  * collation. At this point, the only recognized keyword for this
733  * service is "collation".
734  * @param status input-output error code
735  * @return a string enumeration over locale strings. The caller is
736  * responsible for closing the result.
737  * \xrefitem stable "Stable" "Stable List" ICU 3.0
738  */
739 U_CAPI UEnumeration* U_EXPORT2
740 ucol_getKeywords(UErrorCode *status) __INTRODUCED_IN(__ANDROID_API_T__);
741 
742 
743 
744 /**
745  * Given a keyword, create a string enumeration of all values
746  * for that keyword that are currently in use.
747  * @param keyword a particular keyword as enumerated by
748  * ucol_getKeywords. If any other keyword is passed in, *status is set
749  * to U_ILLEGAL_ARGUMENT_ERROR.
750  * @param status input-output error code
751  * @return a string enumeration over collation keyword values, or NULL
752  * upon error. The caller is responsible for closing the result.
753  * \xrefitem stable "Stable" "Stable List" ICU 3.0
754  */
755 U_CAPI UEnumeration* U_EXPORT2
756 ucol_getKeywordValues(const char *keyword, UErrorCode *status) __INTRODUCED_IN(__ANDROID_API_T__);
757 
758 
759 
760 /**
761  * Given a key and a locale, returns an array of string values in a preferred
762  * order that would make a difference. These are all and only those values where
763  * the open (creation) of the service with the locale formed from the input locale
764  * plus input keyword and that value has different behavior than creation with the
765  * input locale alone.
766  * @param key           one of the keys supported by this service.  For now, only
767  *                      "collation" is supported.
768  * @param locale        the locale
769  * @param commonlyUsed  if set to true it will return only commonly used values
770  *                      with the given locale in preferred order.  Otherwise,
771  *                      it will return all the available values for the locale.
772  * @param status error status
773  * @return a string enumeration over keyword values for the given key and the locale.
774  * \xrefitem stable "Stable" "Stable List" ICU 4.2
775  */
776 U_CAPI UEnumeration* U_EXPORT2
777 ucol_getKeywordValuesForLocale(const char* key,
778                                const char* locale,
779                                UBool commonlyUsed,
780                                UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
781 
782 
783 
784 /**
785  * Return the functionally equivalent locale for the specified
786  * input locale, with respect to given keyword, for the
787  * collation service. If two different input locale + keyword
788  * combinations produce the same result locale, then collators
789  * instantiated for these two different input locales will behave
790  * equivalently. The converse is not always true; two collators
791  * may in fact be equivalent, but return different results, due to
792  * internal details. The return result has no other meaning than
793  * that stated above, and implies nothing as to the relationship
794  * between the two locales. This is intended for use by
795  * applications who wish to cache collators, or otherwise reuse
796  * collators when possible. The functional equivalent may change
797  * over time. For more information, please see the <a
798  * href="https://unicode-org.github.io/icu/userguide/locale#locales-and-services">
799  * Locales and Services</a> section of the ICU User Guide.
800  * @param result fillin for the functionally equivalent result locale
801  * @param resultCapacity capacity of the fillin buffer
802  * @param keyword a particular keyword as enumerated by
803  * ucol_getKeywords.
804  * @param locale the specified input locale
805  * @param isAvailable if non-NULL, pointer to a fillin parameter that
806  * on return indicates whether the specified input locale was 'available'
807  * to the collation service. A locale is defined as 'available' if it
808  * physically exists within the collation locale data.
809  * @param status pointer to input-output error code
810  * @return the actual buffer size needed for the locale. If greater
811  * than resultCapacity, the returned full name will be truncated and
812  * an error code will be returned.
813  * \xrefitem stable "Stable" "Stable List" ICU 3.0
814  */
815 U_CAPI int32_t U_EXPORT2
816 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
817                              const char* keyword, const char* locale,
818                              UBool* isAvailable, UErrorCode* status) __INTRODUCED_IN(__ANDROID_API_T__);
819 
820 
821 
822 
823 
824 #ifndef U_HIDE_DEPRECATED_API
825 
826 
827 
828 #endif  /* U_HIDE_DEPRECATED_API */
829 
830 
831 /**
832  * Get a sort key for a string from a UCollator.
833  * Sort keys may be compared using <TT>strcmp</TT>.
834  *
835  * Note that sort keys are often less efficient than simply doing comparison.
836  * For more details, see the ICU User Guide.
837  *
838  * Like ICU functions that write to an output buffer, the buffer contents
839  * is undefined if the buffer capacity (resultLength parameter) is too small.
840  * Unlike ICU functions that write a string to an output buffer,
841  * the terminating zero byte is counted in the sort key length.
842  * @param coll The UCollator containing the collation rules.
843  * @param source The string to transform.
844  * @param sourceLength The length of source, or -1 if null-terminated.
845  * @param result A pointer to a buffer to receive the attribute.
846  * @param resultLength The maximum size of result.
847  * @return The size needed to fully store the sort key.
848  *      If there was an internal error generating the sort key,
849  *      a zero value is returned.
850  * @see ucol_keyHashCode
851  * \xrefitem stable "Stable" "Stable List" ICU 2.0
852  */
853 U_CAPI int32_t U_EXPORT2
854 ucol_getSortKey(const    UCollator    *coll,
855         const    UChar        *source,
856         int32_t        sourceLength,
857         uint8_t        *result,
858         int32_t        resultLength) __INTRODUCED_IN(__ANDROID_API_T__);
859 
860 
861 
862 
863 
864 
865 /** enum that is taken by ucol_getBound API
866  * See below for explanation
867  * do not change the values assigned to the
868  * members of this enum. Underlying code
869  * depends on them having these numbers
870  * \xrefitem stable "Stable" "Stable List" ICU 2.0
871  */
872 typedef enum UColBoundMode : int32_t {
873   /** lower bound */
874   UCOL_BOUND_LOWER = 0,
875   /** upper bound that will match strings of exact size */
876   UCOL_BOUND_UPPER = 1,
877   /** upper bound that will match all the strings that have the same initial substring as the given string */
878   UCOL_BOUND_UPPER_LONG = 2,
879 #ifndef U_HIDE_DEPRECATED_API
880     /**
881      * One more than the highest normal UColBoundMode value.
882      * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420.
883      */
884     UCOL_BOUND_VALUE_COUNT
885 #endif  /* U_HIDE_DEPRECATED_API */
886 } UColBoundMode;
887 
888 
889 
890 
891 
892 
893 
894 /**
895  * Merges two sort keys. The levels are merged with their corresponding counterparts
896  * (primaries with primaries, secondaries with secondaries etc.). Between the values
897  * from the same level a separator is inserted.
898  *
899  * This is useful, for example, for combining sort keys from first and last names
900  * to sort such pairs.
901  * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
902  *
903  * The recommended way to achieve "merged" sorting is by
904  * concatenating strings with U+FFFE between them.
905  * The concatenation has the same sort order as the merged sort keys,
906  * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2).
907  * Using strings with U+FFFE may yield shorter sort keys.
908  *
909  * For details about Sort Key Features see
910  * https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features
911  *
912  * It is possible to merge multiple sort keys by consecutively merging
913  * another one with the intermediate result.
914  *
915  * The length of the merge result is the sum of the lengths of the input sort keys.
916  *
917  * Example (uncompressed):
918  * <pre>191B1D 01 050505 01 910505 00
919  * 1F2123 01 050505 01 910505 00</pre>
920  * will be merged as
921  * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
922  *
923  * If the destination buffer is not big enough, then its contents are undefined.
924  * If any of source lengths are zero or any of the source pointers are NULL/undefined,
925  * the result is of size zero.
926  *
927  * @param src1 the first sort key
928  * @param src1Length the length of the first sort key, including the zero byte at the end;
929  *        can be -1 if the function is to find the length
930  * @param src2 the second sort key
931  * @param src2Length the length of the second sort key, including the zero byte at the end;
932  *        can be -1 if the function is to find the length
933  * @param dest the buffer where the merged sort key is written,
934  *        can be NULL if destCapacity==0
935  * @param destCapacity the number of bytes in the dest buffer
936  * @return the length of the merged sort key, src1Length+src2Length;
937  *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
938  *         in which cases the contents of dest is undefined
939  * \xrefitem stable "Stable" "Stable List" ICU 2.0
940  */
941 U_CAPI int32_t U_EXPORT2
942 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
943                    const uint8_t *src2, int32_t src2Length,
944                    uint8_t *dest, int32_t destCapacity) __INTRODUCED_IN(__ANDROID_API_T__);
945 
946 
947 
948 /**
949  * Universal attribute setter
950  * @param coll collator which attributes are to be changed
951  * @param attr attribute type
952  * @param value attribute value
953  * @param status to indicate whether the operation went on smoothly or there were errors
954  * @see UColAttribute
955  * @see UColAttributeValue
956  * @see ucol_getAttribute
957  * \xrefitem stable "Stable" "Stable List" ICU 2.0
958  */
959 U_CAPI void U_EXPORT2
960 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) __INTRODUCED_IN(__ANDROID_API_T__);
961 
962 
963 
964 /**
965  * Universal attribute getter
966  * @param coll collator which attributes are to be changed
967  * @param attr attribute type
968  * @return attribute value
969  * @param status to indicate whether the operation went on smoothly or there were errors
970  * @see UColAttribute
971  * @see UColAttributeValue
972  * @see ucol_setAttribute
973  * \xrefitem stable "Stable" "Stable List" ICU 2.0
974  */
975 U_CAPI UColAttributeValue  U_EXPORT2
976 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) __INTRODUCED_IN(__ANDROID_API_T__);
977 
978 
979 
980 /**
981  * Sets the variable top to the top of the specified reordering group.
982  * The variable top determines the highest-sorting character
983  * which is affected by UCOL_ALTERNATE_HANDLING.
984  * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
985  * @param coll the collator
986  * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
987  *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
988  *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
989  * @param pErrorCode Standard ICU error code. Its input value must
990  *                   pass the U_SUCCESS() test, or else the function returns
991  *                   immediately. Check for U_FAILURE() on output or use with
992  *                   function chaining. (See User Guide for details.)
993  * @see ucol_getMaxVariable
994  * \xrefitem stable "Stable" "Stable List" ICU 53
995  */
996 U_CAPI void U_EXPORT2
997 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) __INTRODUCED_IN(__ANDROID_API_T__);
998 
999 
1000 
1001 /**
1002  * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
1003  * @param coll the collator
1004  * @return the maximum variable reordering group.
1005  * @see ucol_setMaxVariable
1006  * \xrefitem stable "Stable" "Stable List" ICU 53
1007  */
1008 U_CAPI UColReorderCode U_EXPORT2
1009 ucol_getMaxVariable(const UCollator *coll) __INTRODUCED_IN(__ANDROID_API_T__);
1010 
1011 
1012 
1013 #ifndef U_HIDE_DEPRECATED_API
1014 
1015 #endif  /* U_HIDE_DEPRECATED_API */
1016 
1017 
1018 
1019 #ifndef U_HIDE_DEPRECATED_API
1020 
1021 #endif  /* U_HIDE_DEPRECATED_API */
1022 
1023 /**
1024  * Thread safe cloning operation. The result is a clone of a given collator.
1025  * @param coll collator to be cloned
1026  * @param status to indicate whether the operation went on smoothly or there were errors
1027  * @return pointer to the new clone
1028  * @see ucol_open
1029  * @see ucol_openRules
1030  * @see ucol_close
1031  * \xrefitem stable "Stable" "Stable List" ICU 71
1032  */
1033 U_CAPI UCollator* U_EXPORT2 ucol_clone(const UCollator *coll, UErrorCode *status) __INTRODUCED_IN(__ANDROID_API_T__);
1034 
1035 
1036 
1037 #ifndef U_HIDE_DEPRECATED_API
1038 
1039 
1040 
1041 
1042 /** default memory size for the new clone.
1043  * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
1044  */
1045 #define U_COL_SAFECLONE_BUFFERSIZE 1
1046 
1047 #endif /* U_HIDE_DEPRECATED_API */
1048 
1049 
1050 
1051 #ifndef U_HIDE_DEPRECATED_API
1052 
1053 #endif  /* U_HIDE_DEPRECATED_API */
1054 
1055 
1056 
1057 
1058 
1059 #ifndef U_HIDE_INTERNAL_API
1060 
1061 
1062 
1063 #endif  /* U_HIDE_INTERNAL_API */
1064 
1065 
1066 
1067 
1068 
1069 
1070 #endif /* #if !UCONFIG_NO_COLLATION */
1071 
1072 #endif
1073 
1074 /** @} */ // addtogroup
1075