1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1996-2012, International Business Machines * 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. * 7*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 8*0e209d39SAndroid Build Coastguard Worker */ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker /** 11*0e209d39SAndroid Build Coastguard Worker * \file 12*0e209d39SAndroid Build Coastguard Worker * \brief Originally, added as C++ API for Collation data used to compute minLengthInChars 13*0e209d39SAndroid Build Coastguard Worker * \internal 14*0e209d39SAndroid Build Coastguard Worker */ 15*0e209d39SAndroid Build Coastguard Worker 16*0e209d39SAndroid Build Coastguard Worker /* 17*0e209d39SAndroid Build Coastguard Worker * Note: This module was included in ICU 4.0.1 as @internal technology preview for supporting 18*0e209d39SAndroid Build Coastguard Worker * Boyer-Moore string search API. For now, only SSearchTest depends on this module. 19*0e209d39SAndroid Build Coastguard Worker * I temporarily moved the module from i18n directory to intltest, because we have no plan to 20*0e209d39SAndroid Build Coastguard Worker * publish this as public API. (2012-12-18 yoshito) 21*0e209d39SAndroid Build Coastguard Worker */ 22*0e209d39SAndroid Build Coastguard Worker 23*0e209d39SAndroid Build Coastguard Worker #ifndef COLL_DATA_H 24*0e209d39SAndroid Build Coastguard Worker #define COLL_DATA_H 25*0e209d39SAndroid Build Coastguard Worker 26*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 27*0e209d39SAndroid Build Coastguard Worker 28*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION 29*0e209d39SAndroid Build Coastguard Worker 30*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucol.h" 31*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h" 32*0e209d39SAndroid Build Coastguard Worker 33*0e209d39SAndroid Build Coastguard Worker /** 34*0e209d39SAndroid Build Coastguard Worker * The size of the internal CE buffer in a <code>CEList</code> object 35*0e209d39SAndroid Build Coastguard Worker */ 36*0e209d39SAndroid Build Coastguard Worker #define CELIST_BUFFER_SIZE 4 37*0e209d39SAndroid Build Coastguard Worker 38*0e209d39SAndroid Build Coastguard Worker /** 39*0e209d39SAndroid Build Coastguard Worker * \def INSTRUMENT_CELIST 40*0e209d39SAndroid Build Coastguard Worker * Define this to enable the <code>CEList</code> objects to collect 41*0e209d39SAndroid Build Coastguard Worker * statistics. 42*0e209d39SAndroid Build Coastguard Worker */ 43*0e209d39SAndroid Build Coastguard Worker 44*0e209d39SAndroid Build Coastguard Worker /** 45*0e209d39SAndroid Build Coastguard Worker * The size of the initial list in a <code>StringList</code> object. 46*0e209d39SAndroid Build Coastguard Worker */ 47*0e209d39SAndroid Build Coastguard Worker #define STRING_LIST_BUFFER_SIZE 16 48*0e209d39SAndroid Build Coastguard Worker 49*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_USE 50*0e209d39SAndroid Build Coastguard Worker 51*0e209d39SAndroid Build Coastguard Worker /** 52*0e209d39SAndroid Build Coastguard Worker * This object holds a list of CEs generated from a particular 53*0e209d39SAndroid Build Coastguard Worker * <code>UnicodeString</code> 54*0e209d39SAndroid Build Coastguard Worker * 55*0e209d39SAndroid Build Coastguard Worker */ 56*0e209d39SAndroid Build Coastguard Worker class CEList 57*0e209d39SAndroid Build Coastguard Worker { 58*0e209d39SAndroid Build Coastguard Worker public: 59*0e209d39SAndroid Build Coastguard Worker /** 60*0e209d39SAndroid Build Coastguard Worker * Construct a <code>CEList</code> object. 61*0e209d39SAndroid Build Coastguard Worker * 62*0e209d39SAndroid Build Coastguard Worker * @param coll - the Collator used to collect the CEs. 63*0e209d39SAndroid Build Coastguard Worker * @param string - the string for which to collect the CEs. 64*0e209d39SAndroid Build Coastguard Worker * @param status - will be set if any errors occur. 65*0e209d39SAndroid Build Coastguard Worker * 66*0e209d39SAndroid Build Coastguard Worker * Note: if on return, status is set to an error code, 67*0e209d39SAndroid Build Coastguard Worker * the only safe thing to do with this object is to call 68*0e209d39SAndroid Build Coastguard Worker * the destructor. 69*0e209d39SAndroid Build Coastguard Worker */ 70*0e209d39SAndroid Build Coastguard Worker CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); 71*0e209d39SAndroid Build Coastguard Worker 72*0e209d39SAndroid Build Coastguard Worker /** 73*0e209d39SAndroid Build Coastguard Worker * The destructor. 74*0e209d39SAndroid Build Coastguard Worker */ 75*0e209d39SAndroid Build Coastguard Worker ~CEList(); 76*0e209d39SAndroid Build Coastguard Worker 77*0e209d39SAndroid Build Coastguard Worker /** 78*0e209d39SAndroid Build Coastguard Worker * Return the number of CEs in the list. 79*0e209d39SAndroid Build Coastguard Worker * 80*0e209d39SAndroid Build Coastguard Worker * @return the number of CEs in the list. 81*0e209d39SAndroid Build Coastguard Worker */ 82*0e209d39SAndroid Build Coastguard Worker int32_t size() const; 83*0e209d39SAndroid Build Coastguard Worker 84*0e209d39SAndroid Build Coastguard Worker /** 85*0e209d39SAndroid Build Coastguard Worker * Get a particular CE from the list. 86*0e209d39SAndroid Build Coastguard Worker * 87*0e209d39SAndroid Build Coastguard Worker * @param index - the index of the CE to return 88*0e209d39SAndroid Build Coastguard Worker * 89*0e209d39SAndroid Build Coastguard Worker * @return the CE, or <code>0</code> if <code>index</code> is out of range 90*0e209d39SAndroid Build Coastguard Worker */ 91*0e209d39SAndroid Build Coastguard Worker uint32_t get(int32_t index) const; 92*0e209d39SAndroid Build Coastguard Worker 93*0e209d39SAndroid Build Coastguard Worker /** 94*0e209d39SAndroid Build Coastguard Worker * Check if the CEs in another <code>CEList</code> match the 95*0e209d39SAndroid Build Coastguard Worker * suffix of this list starting at a give offset. 96*0e209d39SAndroid Build Coastguard Worker * 97*0e209d39SAndroid Build Coastguard Worker * @param offset - the offset of the suffix 98*0e209d39SAndroid Build Coastguard Worker * @param other - the other <code>CEList</code> 99*0e209d39SAndroid Build Coastguard Worker * 100*0e209d39SAndroid Build Coastguard Worker * @return <code>true</code> if the CEs match, <code>false</code> otherwise. 101*0e209d39SAndroid Build Coastguard Worker */ 102*0e209d39SAndroid Build Coastguard Worker UBool matchesAt(int32_t offset, const CEList *other) const; 103*0e209d39SAndroid Build Coastguard Worker 104*0e209d39SAndroid Build Coastguard Worker /** 105*0e209d39SAndroid Build Coastguard Worker * The index operator. 106*0e209d39SAndroid Build Coastguard Worker * 107*0e209d39SAndroid Build Coastguard Worker * @param index - the index 108*0e209d39SAndroid Build Coastguard Worker * 109*0e209d39SAndroid Build Coastguard Worker * @return a reference to the given CE in the list 110*0e209d39SAndroid Build Coastguard Worker */ 111*0e209d39SAndroid Build Coastguard Worker uint32_t &operator[](int32_t index) const; 112*0e209d39SAndroid Build Coastguard Worker 113*0e209d39SAndroid Build Coastguard Worker private: 114*0e209d39SAndroid Build Coastguard Worker void add(uint32_t ce, UErrorCode &status); 115*0e209d39SAndroid Build Coastguard Worker 116*0e209d39SAndroid Build Coastguard Worker uint32_t ceBuffer[CELIST_BUFFER_SIZE]; 117*0e209d39SAndroid Build Coastguard Worker uint32_t *ces; 118*0e209d39SAndroid Build Coastguard Worker int32_t listMax; 119*0e209d39SAndroid Build Coastguard Worker int32_t listSize; 120*0e209d39SAndroid Build Coastguard Worker }; 121*0e209d39SAndroid Build Coastguard Worker 122*0e209d39SAndroid Build Coastguard Worker /** 123*0e209d39SAndroid Build Coastguard Worker * StringList 124*0e209d39SAndroid Build Coastguard Worker * 125*0e209d39SAndroid Build Coastguard Worker * This object holds a list of <code>UnicodeString</code> objects. 126*0e209d39SAndroid Build Coastguard Worker */ 127*0e209d39SAndroid Build Coastguard Worker class StringList 128*0e209d39SAndroid Build Coastguard Worker { 129*0e209d39SAndroid Build Coastguard Worker public: 130*0e209d39SAndroid Build Coastguard Worker /** 131*0e209d39SAndroid Build Coastguard Worker * Construct an empty <code>StringList</code> 132*0e209d39SAndroid Build Coastguard Worker * 133*0e209d39SAndroid Build Coastguard Worker * @param status - will be set if any errors occur. 134*0e209d39SAndroid Build Coastguard Worker * 135*0e209d39SAndroid Build Coastguard Worker * Note: if on return, status is set to an error code, 136*0e209d39SAndroid Build Coastguard Worker * the only safe thing to do with this object is to call 137*0e209d39SAndroid Build Coastguard Worker * the destructor. 138*0e209d39SAndroid Build Coastguard Worker */ 139*0e209d39SAndroid Build Coastguard Worker StringList(UErrorCode &status); 140*0e209d39SAndroid Build Coastguard Worker 141*0e209d39SAndroid Build Coastguard Worker /** 142*0e209d39SAndroid Build Coastguard Worker * The destructor. 143*0e209d39SAndroid Build Coastguard Worker */ 144*0e209d39SAndroid Build Coastguard Worker ~StringList(); 145*0e209d39SAndroid Build Coastguard Worker 146*0e209d39SAndroid Build Coastguard Worker /** 147*0e209d39SAndroid Build Coastguard Worker * Add a string to the list. 148*0e209d39SAndroid Build Coastguard Worker * 149*0e209d39SAndroid Build Coastguard Worker * @param string - the string to add 150*0e209d39SAndroid Build Coastguard Worker * @param status - will be set if any errors occur. 151*0e209d39SAndroid Build Coastguard Worker */ 152*0e209d39SAndroid Build Coastguard Worker void add(const UnicodeString *string, UErrorCode &status); 153*0e209d39SAndroid Build Coastguard Worker 154*0e209d39SAndroid Build Coastguard Worker /** 155*0e209d39SAndroid Build Coastguard Worker * Add an array of Unicode code points to the list. 156*0e209d39SAndroid Build Coastguard Worker * 157*0e209d39SAndroid Build Coastguard Worker * @param chars - the address of the array of code points 158*0e209d39SAndroid Build Coastguard Worker * @param count - the number of code points in the array 159*0e209d39SAndroid Build Coastguard Worker * @param status - will be set if any errors occur. 160*0e209d39SAndroid Build Coastguard Worker */ 161*0e209d39SAndroid Build Coastguard Worker void add(const char16_t *chars, int32_t count, UErrorCode &status); 162*0e209d39SAndroid Build Coastguard Worker 163*0e209d39SAndroid Build Coastguard Worker /** 164*0e209d39SAndroid Build Coastguard Worker * Get a particular string from the list. 165*0e209d39SAndroid Build Coastguard Worker * 166*0e209d39SAndroid Build Coastguard Worker * @param index - the index of the string 167*0e209d39SAndroid Build Coastguard Worker * 168*0e209d39SAndroid Build Coastguard Worker * @return a pointer to the <code>UnicodeString</code> or <code>nullptr</code> 169*0e209d39SAndroid Build Coastguard Worker * if <code>index</code> is out of bounds. 170*0e209d39SAndroid Build Coastguard Worker */ 171*0e209d39SAndroid Build Coastguard Worker const UnicodeString *get(int32_t index) const; 172*0e209d39SAndroid Build Coastguard Worker 173*0e209d39SAndroid Build Coastguard Worker /** 174*0e209d39SAndroid Build Coastguard Worker * Get the number of strings in the list. 175*0e209d39SAndroid Build Coastguard Worker * 176*0e209d39SAndroid Build Coastguard Worker * @return the number of strings in the list. 177*0e209d39SAndroid Build Coastguard Worker */ 178*0e209d39SAndroid Build Coastguard Worker int32_t size() const; 179*0e209d39SAndroid Build Coastguard Worker 180*0e209d39SAndroid Build Coastguard Worker private: 181*0e209d39SAndroid Build Coastguard Worker UnicodeString *strings; 182*0e209d39SAndroid Build Coastguard Worker int32_t listMax; 183*0e209d39SAndroid Build Coastguard Worker int32_t listSize; 184*0e209d39SAndroid Build Coastguard Worker }; 185*0e209d39SAndroid Build Coastguard Worker 186*0e209d39SAndroid Build Coastguard Worker 187*0e209d39SAndroid Build Coastguard Worker /* 188*0e209d39SAndroid Build Coastguard Worker * Forward references to internal classes. 189*0e209d39SAndroid Build Coastguard Worker */ 190*0e209d39SAndroid Build Coastguard Worker class StringToCEsMap; 191*0e209d39SAndroid Build Coastguard Worker class CEToStringsMap; 192*0e209d39SAndroid Build Coastguard Worker 193*0e209d39SAndroid Build Coastguard Worker /** 194*0e209d39SAndroid Build Coastguard Worker * CollData 195*0e209d39SAndroid Build Coastguard Worker * 196*0e209d39SAndroid Build Coastguard Worker * This class holds the Collator-specific data needed to 197*0e209d39SAndroid Build Coastguard Worker * compute the length of the shortest string that can 198*0e209d39SAndroid Build Coastguard Worker * generate a particular list of CEs. 199*0e209d39SAndroid Build Coastguard Worker * 200*0e209d39SAndroid Build Coastguard Worker * <code>CollData</code> objects are quite expensive to compute. Because 201*0e209d39SAndroid Build Coastguard Worker * of this, they are cached. When you call <code>CollData::open</code> it 202*0e209d39SAndroid Build Coastguard Worker * returns a reference counted cached object. When you call <code>CollData::close</code> 203*0e209d39SAndroid Build Coastguard Worker * the reference count on the object is decremented but the object is not deleted. 204*0e209d39SAndroid Build Coastguard Worker * 205*0e209d39SAndroid Build Coastguard Worker * If you do not need to reuse any unreferenced objects in the cache, you can call 206*0e209d39SAndroid Build Coastguard Worker * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code> 207*0e209d39SAndroid Build Coastguard Worker * objects, you can call <code>CollData::freeCollDataCache</code> 208*0e209d39SAndroid Build Coastguard Worker */ 209*0e209d39SAndroid Build Coastguard Worker class CollData 210*0e209d39SAndroid Build Coastguard Worker { 211*0e209d39SAndroid Build Coastguard Worker public: 212*0e209d39SAndroid Build Coastguard Worker /** 213*0e209d39SAndroid Build Coastguard Worker * Construct a <code>CollData</code> object. 214*0e209d39SAndroid Build Coastguard Worker * 215*0e209d39SAndroid Build Coastguard Worker * @param collator - the collator 216*0e209d39SAndroid Build Coastguard Worker * @param status - will be set if any errors occur. 217*0e209d39SAndroid Build Coastguard Worker */ 218*0e209d39SAndroid Build Coastguard Worker CollData(UCollator *collator, UErrorCode &status); 219*0e209d39SAndroid Build Coastguard Worker 220*0e209d39SAndroid Build Coastguard Worker /** 221*0e209d39SAndroid Build Coastguard Worker * The destructor. 222*0e209d39SAndroid Build Coastguard Worker */ 223*0e209d39SAndroid Build Coastguard Worker ~CollData(); 224*0e209d39SAndroid Build Coastguard Worker 225*0e209d39SAndroid Build Coastguard Worker /** 226*0e209d39SAndroid Build Coastguard Worker * Get the <code>UCollator</code> object used to create this object. 227*0e209d39SAndroid Build Coastguard Worker * The object returned may not be the exact object that was used to 228*0e209d39SAndroid Build Coastguard Worker * create this object, but it will have the same behavior. 229*0e209d39SAndroid Build Coastguard Worker */ 230*0e209d39SAndroid Build Coastguard Worker UCollator *getCollator() const; 231*0e209d39SAndroid Build Coastguard Worker 232*0e209d39SAndroid Build Coastguard Worker /** 233*0e209d39SAndroid Build Coastguard Worker * Get a list of all the strings which generate a list 234*0e209d39SAndroid Build Coastguard Worker * of CEs starting with a given CE. 235*0e209d39SAndroid Build Coastguard Worker * 236*0e209d39SAndroid Build Coastguard Worker * @param ce - the CE 237*0e209d39SAndroid Build Coastguard Worker * 238*0e209d39SAndroid Build Coastguard Worker * return a <code>StringList</code> object containing all 239*0e209d39SAndroid Build Coastguard Worker * the strings, or <code>nullptr</code> if there are 240*0e209d39SAndroid Build Coastguard Worker * no such strings. 241*0e209d39SAndroid Build Coastguard Worker */ 242*0e209d39SAndroid Build Coastguard Worker const StringList *getStringList(int32_t ce) const; 243*0e209d39SAndroid Build Coastguard Worker 244*0e209d39SAndroid Build Coastguard Worker /** 245*0e209d39SAndroid Build Coastguard Worker * Get a list of the CEs generated by a particular string. 246*0e209d39SAndroid Build Coastguard Worker * 247*0e209d39SAndroid Build Coastguard Worker * @param string - the string 248*0e209d39SAndroid Build Coastguard Worker * 249*0e209d39SAndroid Build Coastguard Worker * @return a <code>CEList</code> object containing the CEs. You 250*0e209d39SAndroid Build Coastguard Worker * must call <code>freeCEList</code> when you are finished 251*0e209d39SAndroid Build Coastguard Worker * using the <code>CEList</code>/ 252*0e209d39SAndroid Build Coastguard Worker */ 253*0e209d39SAndroid Build Coastguard Worker const CEList *getCEList(const UnicodeString *string) const; 254*0e209d39SAndroid Build Coastguard Worker 255*0e209d39SAndroid Build Coastguard Worker /** 256*0e209d39SAndroid Build Coastguard Worker * Release a <code>CEList</code> returned by <code>getCEList</code>. 257*0e209d39SAndroid Build Coastguard Worker * 258*0e209d39SAndroid Build Coastguard Worker * @param list - the <code>CEList</code> to free. 259*0e209d39SAndroid Build Coastguard Worker */ 260*0e209d39SAndroid Build Coastguard Worker void freeCEList(const CEList *list); 261*0e209d39SAndroid Build Coastguard Worker 262*0e209d39SAndroid Build Coastguard Worker /** 263*0e209d39SAndroid Build Coastguard Worker * Return the length of the shortest string that will generate 264*0e209d39SAndroid Build Coastguard Worker * the given list of CEs. 265*0e209d39SAndroid Build Coastguard Worker * 266*0e209d39SAndroid Build Coastguard Worker * @param ces - the CEs 267*0e209d39SAndroid Build Coastguard Worker * @param offset - the offset of the first CE in the list to use. 268*0e209d39SAndroid Build Coastguard Worker * 269*0e209d39SAndroid Build Coastguard Worker * @return the length of the shortest string. 270*0e209d39SAndroid Build Coastguard Worker */ 271*0e209d39SAndroid Build Coastguard Worker int32_t minLengthInChars(const CEList *ces, int32_t offset) const; 272*0e209d39SAndroid Build Coastguard Worker 273*0e209d39SAndroid Build Coastguard Worker 274*0e209d39SAndroid Build Coastguard Worker /** 275*0e209d39SAndroid Build Coastguard Worker * Return the length of the shortest string that will generate 276*0e209d39SAndroid Build Coastguard Worker * the given list of CEs. 277*0e209d39SAndroid Build Coastguard Worker * 278*0e209d39SAndroid Build Coastguard Worker * Note: the algorithm used to do this computation is recursive. To 279*0e209d39SAndroid Build Coastguard Worker * limit the amount of recursion, a "history" list is used to record 280*0e209d39SAndroid Build Coastguard Worker * the best answer starting at a particular offset in the list of CEs. 281*0e209d39SAndroid Build Coastguard Worker * If the same offset is visited again during the recursion, the answer 282*0e209d39SAndroid Build Coastguard Worker * in the history list is used. 283*0e209d39SAndroid Build Coastguard Worker * 284*0e209d39SAndroid Build Coastguard Worker * @param ces - the CEs 285*0e209d39SAndroid Build Coastguard Worker * @param offset - the offset of the first CE in the list to use. 286*0e209d39SAndroid Build Coastguard Worker * @param history - the history list. Must be at least as long as 287*0e209d39SAndroid Build Coastguard Worker * the number of cEs in the <code>CEList</code> 288*0e209d39SAndroid Build Coastguard Worker * 289*0e209d39SAndroid Build Coastguard Worker * @return the length of the shortest string. 290*0e209d39SAndroid Build Coastguard Worker */ 291*0e209d39SAndroid Build Coastguard Worker int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const; 292*0e209d39SAndroid Build Coastguard Worker 293*0e209d39SAndroid Build Coastguard Worker private: 294*0e209d39SAndroid Build Coastguard Worker UCollator *coll; 295*0e209d39SAndroid Build Coastguard Worker CEToStringsMap *ceToCharsStartingWith; 296*0e209d39SAndroid Build Coastguard Worker 297*0e209d39SAndroid Build Coastguard Worker uint32_t minHan; 298*0e209d39SAndroid Build Coastguard Worker uint32_t maxHan; 299*0e209d39SAndroid Build Coastguard Worker 300*0e209d39SAndroid Build Coastguard Worker uint32_t jamoLimits[4]; 301*0e209d39SAndroid Build Coastguard Worker }; 302*0e209d39SAndroid Build Coastguard Worker 303*0e209d39SAndroid Build Coastguard Worker #endif // #if !UCONFIG_NO_COLLATION 304*0e209d39SAndroid Build Coastguard Worker #endif // #ifndef COLL_DATA_H 305