xref: /aosp_15_r20/external/icu/icu4c/source/test/intltest/colldata.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker  ******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker  *   Copyright (C) 1996-2012, International Business Machines                 *
6*0e209d39SAndroid Build Coastguard Worker  *   Corporation and others.  All Rights Reserved.                            *
7*0e209d39SAndroid Build Coastguard Worker  ******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker  */
9*0e209d39SAndroid Build Coastguard Worker 
10*0e209d39SAndroid Build Coastguard Worker /**
11*0e209d39SAndroid Build Coastguard Worker  * \file
12*0e209d39SAndroid Build Coastguard Worker  * \brief Originally, added as C++ API for Collation data used to compute minLengthInChars
13*0e209d39SAndroid Build Coastguard Worker  * \internal
14*0e209d39SAndroid Build Coastguard Worker  */
15*0e209d39SAndroid Build Coastguard Worker 
16*0e209d39SAndroid Build Coastguard Worker /*
17*0e209d39SAndroid Build Coastguard Worker  * Note: This module was included in ICU 4.0.1 as @internal technology preview for supporting
18*0e209d39SAndroid Build Coastguard Worker  * Boyer-Moore string search API. For now, only SSearchTest depends on this module.
19*0e209d39SAndroid Build Coastguard Worker  * I temporarily moved the module from i18n directory to intltest, because we have no plan to
20*0e209d39SAndroid Build Coastguard Worker  * publish this as public API. (2012-12-18 yoshito)
21*0e209d39SAndroid Build Coastguard Worker  */
22*0e209d39SAndroid Build Coastguard Worker 
23*0e209d39SAndroid Build Coastguard Worker #ifndef COLL_DATA_H
24*0e209d39SAndroid Build Coastguard Worker #define COLL_DATA_H
25*0e209d39SAndroid Build Coastguard Worker 
26*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
27*0e209d39SAndroid Build Coastguard Worker 
28*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucol.h"
31*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
32*0e209d39SAndroid Build Coastguard Worker 
33*0e209d39SAndroid Build Coastguard Worker  /**
34*0e209d39SAndroid Build Coastguard Worker   * The size of the internal CE buffer in a <code>CEList</code> object
35*0e209d39SAndroid Build Coastguard Worker   */
36*0e209d39SAndroid Build Coastguard Worker #define CELIST_BUFFER_SIZE 4
37*0e209d39SAndroid Build Coastguard Worker 
38*0e209d39SAndroid Build Coastguard Worker /**
39*0e209d39SAndroid Build Coastguard Worker  * \def INSTRUMENT_CELIST
40*0e209d39SAndroid Build Coastguard Worker  * Define this to enable the <code>CEList</code> objects to collect
41*0e209d39SAndroid Build Coastguard Worker  * statistics.
42*0e209d39SAndroid Build Coastguard Worker  */
43*0e209d39SAndroid Build Coastguard Worker 
44*0e209d39SAndroid Build Coastguard Worker  /**
45*0e209d39SAndroid Build Coastguard Worker   * The size of the initial list in a <code>StringList</code> object.
46*0e209d39SAndroid Build Coastguard Worker   */
47*0e209d39SAndroid Build Coastguard Worker #define STRING_LIST_BUFFER_SIZE 16
48*0e209d39SAndroid Build Coastguard Worker 
49*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_USE
50*0e209d39SAndroid Build Coastguard Worker 
51*0e209d39SAndroid Build Coastguard Worker  /**
52*0e209d39SAndroid Build Coastguard Worker   * This object holds a list of CEs generated from a particular
53*0e209d39SAndroid Build Coastguard Worker   * <code>UnicodeString</code>
54*0e209d39SAndroid Build Coastguard Worker   *
55*0e209d39SAndroid Build Coastguard Worker   */
56*0e209d39SAndroid Build Coastguard Worker class CEList
57*0e209d39SAndroid Build Coastguard Worker {
58*0e209d39SAndroid Build Coastguard Worker public:
59*0e209d39SAndroid Build Coastguard Worker     /**
60*0e209d39SAndroid Build Coastguard Worker      * Construct a <code>CEList</code> object.
61*0e209d39SAndroid Build Coastguard Worker      *
62*0e209d39SAndroid Build Coastguard Worker      * @param coll - the Collator used to collect the CEs.
63*0e209d39SAndroid Build Coastguard Worker      * @param string - the string for which to collect the CEs.
64*0e209d39SAndroid Build Coastguard Worker      * @param status - will be set if any errors occur.
65*0e209d39SAndroid Build Coastguard Worker      *
66*0e209d39SAndroid Build Coastguard Worker      * Note: if on return, status is set to an error code,
67*0e209d39SAndroid Build Coastguard Worker      * the only safe thing to do with this object is to call
68*0e209d39SAndroid Build Coastguard Worker      * the destructor.
69*0e209d39SAndroid Build Coastguard Worker      */
70*0e209d39SAndroid Build Coastguard Worker     CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
71*0e209d39SAndroid Build Coastguard Worker 
72*0e209d39SAndroid Build Coastguard Worker     /**
73*0e209d39SAndroid Build Coastguard Worker      * The destructor.
74*0e209d39SAndroid Build Coastguard Worker      */
75*0e209d39SAndroid Build Coastguard Worker     ~CEList();
76*0e209d39SAndroid Build Coastguard Worker 
77*0e209d39SAndroid Build Coastguard Worker     /**
78*0e209d39SAndroid Build Coastguard Worker      * Return the number of CEs in the list.
79*0e209d39SAndroid Build Coastguard Worker      *
80*0e209d39SAndroid Build Coastguard Worker      * @return the number of CEs in the list.
81*0e209d39SAndroid Build Coastguard Worker      */
82*0e209d39SAndroid Build Coastguard Worker     int32_t size() const;
83*0e209d39SAndroid Build Coastguard Worker 
84*0e209d39SAndroid Build Coastguard Worker     /**
85*0e209d39SAndroid Build Coastguard Worker      * Get a particular CE from the list.
86*0e209d39SAndroid Build Coastguard Worker      *
87*0e209d39SAndroid Build Coastguard Worker      * @param index - the index of the CE to return
88*0e209d39SAndroid Build Coastguard Worker      *
89*0e209d39SAndroid Build Coastguard Worker      * @return the CE, or <code>0</code> if <code>index</code> is out of range
90*0e209d39SAndroid Build Coastguard Worker      */
91*0e209d39SAndroid Build Coastguard Worker     uint32_t get(int32_t index) const;
92*0e209d39SAndroid Build Coastguard Worker 
93*0e209d39SAndroid Build Coastguard Worker     /**
94*0e209d39SAndroid Build Coastguard Worker      * Check if the CEs in another <code>CEList</code> match the
95*0e209d39SAndroid Build Coastguard Worker      * suffix of this list starting at a give offset.
96*0e209d39SAndroid Build Coastguard Worker      *
97*0e209d39SAndroid Build Coastguard Worker      * @param offset - the offset of the suffix
98*0e209d39SAndroid Build Coastguard Worker      * @param other - the other <code>CEList</code>
99*0e209d39SAndroid Build Coastguard Worker      *
100*0e209d39SAndroid Build Coastguard Worker      * @return <code>true</code> if the CEs match, <code>false</code> otherwise.
101*0e209d39SAndroid Build Coastguard Worker      */
102*0e209d39SAndroid Build Coastguard Worker     UBool matchesAt(int32_t offset, const CEList *other) const;
103*0e209d39SAndroid Build Coastguard Worker 
104*0e209d39SAndroid Build Coastguard Worker     /**
105*0e209d39SAndroid Build Coastguard Worker      * The index operator.
106*0e209d39SAndroid Build Coastguard Worker      *
107*0e209d39SAndroid Build Coastguard Worker      * @param index - the index
108*0e209d39SAndroid Build Coastguard Worker      *
109*0e209d39SAndroid Build Coastguard Worker      * @return a reference to the given CE in the list
110*0e209d39SAndroid Build Coastguard Worker      */
111*0e209d39SAndroid Build Coastguard Worker     uint32_t &operator[](int32_t index) const;
112*0e209d39SAndroid Build Coastguard Worker 
113*0e209d39SAndroid Build Coastguard Worker private:
114*0e209d39SAndroid Build Coastguard Worker     void add(uint32_t ce, UErrorCode &status);
115*0e209d39SAndroid Build Coastguard Worker 
116*0e209d39SAndroid Build Coastguard Worker     uint32_t ceBuffer[CELIST_BUFFER_SIZE];
117*0e209d39SAndroid Build Coastguard Worker     uint32_t *ces;
118*0e209d39SAndroid Build Coastguard Worker     int32_t listMax;
119*0e209d39SAndroid Build Coastguard Worker     int32_t listSize;
120*0e209d39SAndroid Build Coastguard Worker };
121*0e209d39SAndroid Build Coastguard Worker 
122*0e209d39SAndroid Build Coastguard Worker /**
123*0e209d39SAndroid Build Coastguard Worker  * StringList
124*0e209d39SAndroid Build Coastguard Worker  *
125*0e209d39SAndroid Build Coastguard Worker  * This object holds a list of <code>UnicodeString</code> objects.
126*0e209d39SAndroid Build Coastguard Worker  */
127*0e209d39SAndroid Build Coastguard Worker class StringList
128*0e209d39SAndroid Build Coastguard Worker {
129*0e209d39SAndroid Build Coastguard Worker public:
130*0e209d39SAndroid Build Coastguard Worker     /**
131*0e209d39SAndroid Build Coastguard Worker      * Construct an empty <code>StringList</code>
132*0e209d39SAndroid Build Coastguard Worker      *
133*0e209d39SAndroid Build Coastguard Worker      * @param status - will be set if any errors occur.
134*0e209d39SAndroid Build Coastguard Worker      *
135*0e209d39SAndroid Build Coastguard Worker      * Note: if on return, status is set to an error code,
136*0e209d39SAndroid Build Coastguard Worker      * the only safe thing to do with this object is to call
137*0e209d39SAndroid Build Coastguard Worker      * the destructor.
138*0e209d39SAndroid Build Coastguard Worker      */
139*0e209d39SAndroid Build Coastguard Worker     StringList(UErrorCode &status);
140*0e209d39SAndroid Build Coastguard Worker 
141*0e209d39SAndroid Build Coastguard Worker     /**
142*0e209d39SAndroid Build Coastguard Worker      * The destructor.
143*0e209d39SAndroid Build Coastguard Worker      */
144*0e209d39SAndroid Build Coastguard Worker     ~StringList();
145*0e209d39SAndroid Build Coastguard Worker 
146*0e209d39SAndroid Build Coastguard Worker     /**
147*0e209d39SAndroid Build Coastguard Worker      * Add a string to the list.
148*0e209d39SAndroid Build Coastguard Worker      *
149*0e209d39SAndroid Build Coastguard Worker      * @param string - the string to add
150*0e209d39SAndroid Build Coastguard Worker      * @param status - will be set if any errors occur.
151*0e209d39SAndroid Build Coastguard Worker      */
152*0e209d39SAndroid Build Coastguard Worker     void add(const UnicodeString *string, UErrorCode &status);
153*0e209d39SAndroid Build Coastguard Worker 
154*0e209d39SAndroid Build Coastguard Worker     /**
155*0e209d39SAndroid Build Coastguard Worker      * Add an array of Unicode code points to the list.
156*0e209d39SAndroid Build Coastguard Worker      *
157*0e209d39SAndroid Build Coastguard Worker      * @param chars - the address of the array of code points
158*0e209d39SAndroid Build Coastguard Worker      * @param count - the number of code points in the array
159*0e209d39SAndroid Build Coastguard Worker      * @param status - will be set if any errors occur.
160*0e209d39SAndroid Build Coastguard Worker      */
161*0e209d39SAndroid Build Coastguard Worker     void add(const char16_t *chars, int32_t count, UErrorCode &status);
162*0e209d39SAndroid Build Coastguard Worker 
163*0e209d39SAndroid Build Coastguard Worker     /**
164*0e209d39SAndroid Build Coastguard Worker      * Get a particular string from the list.
165*0e209d39SAndroid Build Coastguard Worker      *
166*0e209d39SAndroid Build Coastguard Worker      * @param index - the index of the string
167*0e209d39SAndroid Build Coastguard Worker      *
168*0e209d39SAndroid Build Coastguard Worker      * @return a pointer to the <code>UnicodeString</code> or <code>nullptr</code>
169*0e209d39SAndroid Build Coastguard Worker      *         if <code>index</code> is out of bounds.
170*0e209d39SAndroid Build Coastguard Worker      */
171*0e209d39SAndroid Build Coastguard Worker     const UnicodeString *get(int32_t index) const;
172*0e209d39SAndroid Build Coastguard Worker 
173*0e209d39SAndroid Build Coastguard Worker     /**
174*0e209d39SAndroid Build Coastguard Worker      * Get the number of strings in the list.
175*0e209d39SAndroid Build Coastguard Worker      *
176*0e209d39SAndroid Build Coastguard Worker      * @return the number of strings in the list.
177*0e209d39SAndroid Build Coastguard Worker      */
178*0e209d39SAndroid Build Coastguard Worker     int32_t size() const;
179*0e209d39SAndroid Build Coastguard Worker 
180*0e209d39SAndroid Build Coastguard Worker private:
181*0e209d39SAndroid Build Coastguard Worker     UnicodeString *strings;
182*0e209d39SAndroid Build Coastguard Worker     int32_t listMax;
183*0e209d39SAndroid Build Coastguard Worker     int32_t listSize;
184*0e209d39SAndroid Build Coastguard Worker };
185*0e209d39SAndroid Build Coastguard Worker 
186*0e209d39SAndroid Build Coastguard Worker 
187*0e209d39SAndroid Build Coastguard Worker /*
188*0e209d39SAndroid Build Coastguard Worker  * Forward references to internal classes.
189*0e209d39SAndroid Build Coastguard Worker  */
190*0e209d39SAndroid Build Coastguard Worker class StringToCEsMap;
191*0e209d39SAndroid Build Coastguard Worker class CEToStringsMap;
192*0e209d39SAndroid Build Coastguard Worker 
193*0e209d39SAndroid Build Coastguard Worker /**
194*0e209d39SAndroid Build Coastguard Worker  * CollData
195*0e209d39SAndroid Build Coastguard Worker  *
196*0e209d39SAndroid Build Coastguard Worker  * This class holds the Collator-specific data needed to
197*0e209d39SAndroid Build Coastguard Worker  * compute the length of the shortest string that can
198*0e209d39SAndroid Build Coastguard Worker  * generate a particular list of CEs.
199*0e209d39SAndroid Build Coastguard Worker  *
200*0e209d39SAndroid Build Coastguard Worker  * <code>CollData</code> objects are quite expensive to compute. Because
201*0e209d39SAndroid Build Coastguard Worker  * of this, they are cached. When you call <code>CollData::open</code> it
202*0e209d39SAndroid Build Coastguard Worker  * returns a reference counted cached object. When you call <code>CollData::close</code>
203*0e209d39SAndroid Build Coastguard Worker  * the reference count on the object is decremented but the object is not deleted.
204*0e209d39SAndroid Build Coastguard Worker  *
205*0e209d39SAndroid Build Coastguard Worker  * If you do not need to reuse any unreferenced objects in the cache, you can call
206*0e209d39SAndroid Build Coastguard Worker  * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
207*0e209d39SAndroid Build Coastguard Worker  * objects, you can call <code>CollData::freeCollDataCache</code>
208*0e209d39SAndroid Build Coastguard Worker  */
209*0e209d39SAndroid Build Coastguard Worker class CollData
210*0e209d39SAndroid Build Coastguard Worker {
211*0e209d39SAndroid Build Coastguard Worker public:
212*0e209d39SAndroid Build Coastguard Worker     /**
213*0e209d39SAndroid Build Coastguard Worker      * Construct a <code>CollData</code> object.
214*0e209d39SAndroid Build Coastguard Worker      *
215*0e209d39SAndroid Build Coastguard Worker      * @param collator - the collator
216*0e209d39SAndroid Build Coastguard Worker      * @param status - will be set if any errors occur.
217*0e209d39SAndroid Build Coastguard Worker      */
218*0e209d39SAndroid Build Coastguard Worker     CollData(UCollator *collator, UErrorCode &status);
219*0e209d39SAndroid Build Coastguard Worker 
220*0e209d39SAndroid Build Coastguard Worker     /**
221*0e209d39SAndroid Build Coastguard Worker      * The destructor.
222*0e209d39SAndroid Build Coastguard Worker      */
223*0e209d39SAndroid Build Coastguard Worker     ~CollData();
224*0e209d39SAndroid Build Coastguard Worker 
225*0e209d39SAndroid Build Coastguard Worker     /**
226*0e209d39SAndroid Build Coastguard Worker      * Get the <code>UCollator</code> object used to create this object.
227*0e209d39SAndroid Build Coastguard Worker      * The object returned may not be the exact object that was used to
228*0e209d39SAndroid Build Coastguard Worker      * create this object, but it will have the same behavior.
229*0e209d39SAndroid Build Coastguard Worker      */
230*0e209d39SAndroid Build Coastguard Worker     UCollator *getCollator() const;
231*0e209d39SAndroid Build Coastguard Worker 
232*0e209d39SAndroid Build Coastguard Worker     /**
233*0e209d39SAndroid Build Coastguard Worker      * Get a list of all the strings which generate a list
234*0e209d39SAndroid Build Coastguard Worker      * of CEs starting with a given CE.
235*0e209d39SAndroid Build Coastguard Worker      *
236*0e209d39SAndroid Build Coastguard Worker      * @param ce - the CE
237*0e209d39SAndroid Build Coastguard Worker      *
238*0e209d39SAndroid Build Coastguard Worker      * return a <code>StringList</code> object containing all
239*0e209d39SAndroid Build Coastguard Worker      *        the strings, or <code>nullptr</code> if there are
240*0e209d39SAndroid Build Coastguard Worker      *        no such strings.
241*0e209d39SAndroid Build Coastguard Worker      */
242*0e209d39SAndroid Build Coastguard Worker     const StringList *getStringList(int32_t ce) const;
243*0e209d39SAndroid Build Coastguard Worker 
244*0e209d39SAndroid Build Coastguard Worker     /**
245*0e209d39SAndroid Build Coastguard Worker      * Get a list of the CEs generated by a particular string.
246*0e209d39SAndroid Build Coastguard Worker      *
247*0e209d39SAndroid Build Coastguard Worker      * @param string - the string
248*0e209d39SAndroid Build Coastguard Worker      *
249*0e209d39SAndroid Build Coastguard Worker      * @return a <code>CEList</code> object containing the CEs. You
250*0e209d39SAndroid Build Coastguard Worker      *         must call <code>freeCEList</code> when you are finished
251*0e209d39SAndroid Build Coastguard Worker      *         using the <code>CEList</code>/
252*0e209d39SAndroid Build Coastguard Worker      */
253*0e209d39SAndroid Build Coastguard Worker     const CEList *getCEList(const UnicodeString *string) const;
254*0e209d39SAndroid Build Coastguard Worker 
255*0e209d39SAndroid Build Coastguard Worker     /**
256*0e209d39SAndroid Build Coastguard Worker      * Release a <code>CEList</code> returned by <code>getCEList</code>.
257*0e209d39SAndroid Build Coastguard Worker      *
258*0e209d39SAndroid Build Coastguard Worker      * @param list - the <code>CEList</code> to free.
259*0e209d39SAndroid Build Coastguard Worker      */
260*0e209d39SAndroid Build Coastguard Worker     void freeCEList(const CEList *list);
261*0e209d39SAndroid Build Coastguard Worker 
262*0e209d39SAndroid Build Coastguard Worker     /**
263*0e209d39SAndroid Build Coastguard Worker      * Return the length of the shortest string that will generate
264*0e209d39SAndroid Build Coastguard Worker      * the given list of CEs.
265*0e209d39SAndroid Build Coastguard Worker      *
266*0e209d39SAndroid Build Coastguard Worker      * @param ces - the CEs
267*0e209d39SAndroid Build Coastguard Worker      * @param offset - the offset of the first CE in the list to use.
268*0e209d39SAndroid Build Coastguard Worker      *
269*0e209d39SAndroid Build Coastguard Worker      * @return the length of the shortest string.
270*0e209d39SAndroid Build Coastguard Worker      */
271*0e209d39SAndroid Build Coastguard Worker     int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
272*0e209d39SAndroid Build Coastguard Worker 
273*0e209d39SAndroid Build Coastguard Worker 
274*0e209d39SAndroid Build Coastguard Worker     /**
275*0e209d39SAndroid Build Coastguard Worker      * Return the length of the shortest string that will generate
276*0e209d39SAndroid Build Coastguard Worker      * the given list of CEs.
277*0e209d39SAndroid Build Coastguard Worker      *
278*0e209d39SAndroid Build Coastguard Worker      * Note: the algorithm used to do this computation is recursive. To
279*0e209d39SAndroid Build Coastguard Worker      * limit the amount of recursion, a "history" list is used to record
280*0e209d39SAndroid Build Coastguard Worker      * the best answer starting at a particular offset in the list of CEs.
281*0e209d39SAndroid Build Coastguard Worker      * If the same offset is visited again during the recursion, the answer
282*0e209d39SAndroid Build Coastguard Worker      * in the history list is used.
283*0e209d39SAndroid Build Coastguard Worker      *
284*0e209d39SAndroid Build Coastguard Worker      * @param ces - the CEs
285*0e209d39SAndroid Build Coastguard Worker      * @param offset - the offset of the first CE in the list to use.
286*0e209d39SAndroid Build Coastguard Worker      * @param history - the history list. Must be at least as long as
287*0e209d39SAndroid Build Coastguard Worker      *                 the number of cEs in the <code>CEList</code>
288*0e209d39SAndroid Build Coastguard Worker      *
289*0e209d39SAndroid Build Coastguard Worker      * @return the length of the shortest string.
290*0e209d39SAndroid Build Coastguard Worker      */
291*0e209d39SAndroid Build Coastguard Worker    int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
292*0e209d39SAndroid Build Coastguard Worker 
293*0e209d39SAndroid Build Coastguard Worker private:
294*0e209d39SAndroid Build Coastguard Worker     UCollator      *coll;
295*0e209d39SAndroid Build Coastguard Worker     CEToStringsMap *ceToCharsStartingWith;
296*0e209d39SAndroid Build Coastguard Worker 
297*0e209d39SAndroid Build Coastguard Worker     uint32_t minHan;
298*0e209d39SAndroid Build Coastguard Worker     uint32_t maxHan;
299*0e209d39SAndroid Build Coastguard Worker 
300*0e209d39SAndroid Build Coastguard Worker     uint32_t jamoLimits[4];
301*0e209d39SAndroid Build Coastguard Worker };
302*0e209d39SAndroid Build Coastguard Worker 
303*0e209d39SAndroid Build Coastguard Worker #endif // #if !UCONFIG_NO_COLLATION
304*0e209d39SAndroid Build Coastguard Worker #endif // #ifndef COLL_DATA_H
305