xref: /aosp_15_r20/external/icu/libandroidicu/include/unicode/ucnvsel.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2008-2011, International Business Machines
7 *   Corporation, Google and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 */
11 /*
12  * Author : [email protected] (Mohamed Eldawy)
13  * ucnvsel.h
14  *
15  * Purpose: To generate a list of encodings capable of handling
16  * a given Unicode text
17  *
18  * Started 09-April-2008
19  */
20 
21 #ifndef __ICU_UCNV_SEL_H__
22 #define __ICU_UCNV_SEL_H__
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_CONVERSION
27 
28 #include "unicode/uset.h"
29 #include "unicode/utf16.h"
30 #include "unicode/uenum.h"
31 #include "unicode/ucnv.h"
32 
33 #if U_SHOW_CPLUSPLUS_API
34 #include "unicode/localpointer.h"
35 #endif   // U_SHOW_CPLUSPLUS_API
36 
37 /**
38  * \file
39  * \brief C API: Encoding/charset encoding selector
40  *
41  * A converter selector is built with a set of encoding/charset names
42  * and given an input string returns the set of names of the
43  * corresponding converters which can convert the string.
44  *
45  * A converter selector can be serialized into a buffer and reopened
46  * from the serialized form.
47  */
48 
49 struct UConverterSelector;
50 /**
51  * @{
52  * Typedef for selector data structure.
53  */
54 typedef struct UConverterSelector UConverterSelector;
55 /** @} */
56 
57 /**
58  * Open a selector.
59  * If converterListSize is 0, build for all available converters.
60  * If excludedCodePoints is NULL, don't exclude any code points.
61  *
62  * @param converterList a pointer to encoding names needed to be involved.
63  *                      Can be NULL if converterListSize==0.
64  *                      The list and the names will be cloned, and the caller
65  *                      retains ownership of the original.
66  * @param converterListSize number of encodings in above list.
67  *                          If 0, builds a selector for all available converters.
68  * @param excludedCodePoints a set of code points to be excluded from consideration.
69  *                           That is, excluded code points in a string do not change
70  *                           the selection result. (They might be handled by a callback.)
71  *                           Use NULL to exclude nothing.
72  * @param whichSet what converter set to use? Use this to determine whether
73  *                 to consider only roundtrip mappings or also fallbacks.
74  * @param status an in/out ICU UErrorCode
75  * @return the new selector
76  *
77  * @stable ICU 4.2
78  */
79 U_CAPI UConverterSelector* U_EXPORT2
80 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
81              const USet* excludedCodePoints,
82              const UConverterUnicodeSet whichSet, UErrorCode* status);
83 
84 /**
85  * Closes a selector.
86  * If any Enumerations were returned by ucnv_select*, they become invalid.
87  * They can be closed before or after calling ucnv_closeSelector,
88  * but should never be used after the selector is closed.
89  *
90  * @see ucnv_selectForString
91  * @see ucnv_selectForUTF8
92  *
93  * @param sel selector to close
94  *
95  * @stable ICU 4.2
96  */
97 U_CAPI void U_EXPORT2
98 ucnvsel_close(UConverterSelector *sel);
99 
100 #if U_SHOW_CPLUSPLUS_API
101 
102 U_NAMESPACE_BEGIN
103 
104 /**
105  * \class LocalUConverterSelectorPointer
106  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
107  * For most methods see the LocalPointerBase base class.
108  *
109  * @see LocalPointerBase
110  * @see LocalPointer
111  * @stable ICU 4.4
112  */
113 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
114 
115 U_NAMESPACE_END
116 
117 #endif
118 
119 /**
120  * Open a selector from its serialized form.
121  * The buffer must remain valid and unchanged for the lifetime of the selector.
122  * This is much faster than creating a selector from scratch.
123  * Using a serialized form from a different machine (endianness/charset) is supported.
124  *
125  * @param buffer pointer to the serialized form of a converter selector;
126  *               must be 32-bit-aligned
127  * @param length the capacity of this buffer (can be equal to or larger than
128  *               the actual data length)
129  * @param status an in/out ICU UErrorCode
130  * @return the new selector
131  *
132  * @stable ICU 4.2
133  */
134 U_CAPI UConverterSelector* U_EXPORT2
135 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
136 
137 /**
138  * Serialize a selector into a linear buffer.
139  * The serialized form is portable to different machines.
140  *
141  * @param sel selector to consider
142  * @param buffer pointer to 32-bit-aligned memory to be filled with the
143  *               serialized form of this converter selector
144  * @param bufferCapacity the capacity of this buffer
145  * @param status an in/out ICU UErrorCode
146  * @return the required buffer capacity to hold serialize data (even if the call fails
147  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
148  *
149  * @stable ICU 4.2
150  */
151 U_CAPI int32_t U_EXPORT2
152 ucnvsel_serialize(const UConverterSelector* sel,
153                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
154 
155 /**
156  * Select converters that can map all characters in a UTF-16 string,
157  * ignoring the excluded code points.
158  *
159  * @param sel a selector
160  * @param s UTF-16 string
161  * @param length length of the string, or -1 if NUL-terminated
162  * @param status an in/out ICU UErrorCode
163  * @return an enumeration containing encoding names.
164  *         The returned encoding names and their order will be the same as
165  *         supplied when building the selector.
166  *
167  * @stable ICU 4.2
168  */
169 U_CAPI UEnumeration * U_EXPORT2
170 ucnvsel_selectForString(const UConverterSelector* sel,
171                         const UChar *s, int32_t length, UErrorCode *status);
172 
173 /**
174  * Select converters that can map all characters in a UTF-8 string,
175  * ignoring the excluded code points.
176  *
177  * @param sel a selector
178  * @param s UTF-8 string
179  * @param length length of the string, or -1 if NUL-terminated
180  * @param status an in/out ICU UErrorCode
181  * @return an enumeration containing encoding names.
182  *         The returned encoding names and their order will be the same as
183  *         supplied when building the selector.
184  *
185  * @stable ICU 4.2
186  */
187 U_CAPI UEnumeration * U_EXPORT2
188 ucnvsel_selectForUTF8(const UConverterSelector* sel,
189                       const char *s, int32_t length, UErrorCode *status);
190 
191 #endif  /* !UCONFIG_NO_CONVERSION */
192 
193 #endif  /* __ICU_UCNV_SEL_H__ */
194