xref: /aosp_15_r20/external/icu/icu4c/source/common/ucharstrieiterator.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 2010-2011, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker *   file name:  ucharstrieiterator.h
9*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
10*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
11*0e209d39SAndroid Build Coastguard Worker *   indentation:4
12*0e209d39SAndroid Build Coastguard Worker *
13*0e209d39SAndroid Build Coastguard Worker *   created on: 2010nov15
14*0e209d39SAndroid Build Coastguard Worker *   created by: Markus W. Scherer
15*0e209d39SAndroid Build Coastguard Worker */
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
18*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucharstrie.h"
19*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
20*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h"
21*0e209d39SAndroid Build Coastguard Worker 
22*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
23*0e209d39SAndroid Build Coastguard Worker 
Iterator(ConstChar16Ptr trieUChars,int32_t maxStringLength,UErrorCode & errorCode)24*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
25*0e209d39SAndroid Build Coastguard Worker                                UErrorCode &errorCode)
26*0e209d39SAndroid Build Coastguard Worker         : uchars_(trieUChars),
27*0e209d39SAndroid Build Coastguard Worker           pos_(uchars_), initialPos_(uchars_),
28*0e209d39SAndroid Build Coastguard Worker           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
29*0e209d39SAndroid Build Coastguard Worker           skipValue_(false),
30*0e209d39SAndroid Build Coastguard Worker           maxLength_(maxStringLength), value_(0), stack_(nullptr) {
31*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
32*0e209d39SAndroid Build Coastguard Worker         return;
33*0e209d39SAndroid Build Coastguard Worker     }
34*0e209d39SAndroid Build Coastguard Worker     // stack_ is a pointer so that it's easy to turn ucharstrie.h into
35*0e209d39SAndroid Build Coastguard Worker     // a public API header for which we would want it to depend only on
36*0e209d39SAndroid Build Coastguard Worker     // other public headers.
37*0e209d39SAndroid Build Coastguard Worker     // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
38*0e209d39SAndroid Build Coastguard Worker     // via the UnicodeString and UVector32 implementations, so this additional
39*0e209d39SAndroid Build Coastguard Worker     // cost is minimal.
40*0e209d39SAndroid Build Coastguard Worker     stack_=new UVector32(errorCode);
41*0e209d39SAndroid Build Coastguard Worker     if(stack_==nullptr) {
42*0e209d39SAndroid Build Coastguard Worker         errorCode=U_MEMORY_ALLOCATION_ERROR;
43*0e209d39SAndroid Build Coastguard Worker     }
44*0e209d39SAndroid Build Coastguard Worker }
45*0e209d39SAndroid Build Coastguard Worker 
Iterator(const UCharsTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)46*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
47*0e209d39SAndroid Build Coastguard Worker                                UErrorCode &errorCode)
48*0e209d39SAndroid Build Coastguard Worker         : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
49*0e209d39SAndroid Build Coastguard Worker           remainingMatchLength_(trie.remainingMatchLength_),
50*0e209d39SAndroid Build Coastguard Worker           initialRemainingMatchLength_(trie.remainingMatchLength_),
51*0e209d39SAndroid Build Coastguard Worker           skipValue_(false),
52*0e209d39SAndroid Build Coastguard Worker           maxLength_(maxStringLength), value_(0), stack_(nullptr) {
53*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
54*0e209d39SAndroid Build Coastguard Worker         return;
55*0e209d39SAndroid Build Coastguard Worker     }
56*0e209d39SAndroid Build Coastguard Worker     stack_=new UVector32(errorCode);
57*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
58*0e209d39SAndroid Build Coastguard Worker         return;
59*0e209d39SAndroid Build Coastguard Worker     }
60*0e209d39SAndroid Build Coastguard Worker     if(stack_==nullptr) {
61*0e209d39SAndroid Build Coastguard Worker         errorCode=U_MEMORY_ALLOCATION_ERROR;
62*0e209d39SAndroid Build Coastguard Worker         return;
63*0e209d39SAndroid Build Coastguard Worker     }
64*0e209d39SAndroid Build Coastguard Worker     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
65*0e209d39SAndroid Build Coastguard Worker     if(length>=0) {
66*0e209d39SAndroid Build Coastguard Worker         // Pending linear-match node, append remaining UChars to str_.
67*0e209d39SAndroid Build Coastguard Worker         ++length;
68*0e209d39SAndroid Build Coastguard Worker         if(maxLength_>0 && length>maxLength_) {
69*0e209d39SAndroid Build Coastguard Worker             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
70*0e209d39SAndroid Build Coastguard Worker         }
71*0e209d39SAndroid Build Coastguard Worker         str_.append(pos_, length);
72*0e209d39SAndroid Build Coastguard Worker         pos_+=length;
73*0e209d39SAndroid Build Coastguard Worker         remainingMatchLength_-=length;
74*0e209d39SAndroid Build Coastguard Worker     }
75*0e209d39SAndroid Build Coastguard Worker }
76*0e209d39SAndroid Build Coastguard Worker 
~Iterator()77*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::~Iterator() {
78*0e209d39SAndroid Build Coastguard Worker     delete stack_;
79*0e209d39SAndroid Build Coastguard Worker }
80*0e209d39SAndroid Build Coastguard Worker 
81*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator &
reset()82*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::reset() {
83*0e209d39SAndroid Build Coastguard Worker     pos_=initialPos_;
84*0e209d39SAndroid Build Coastguard Worker     remainingMatchLength_=initialRemainingMatchLength_;
85*0e209d39SAndroid Build Coastguard Worker     skipValue_=false;
86*0e209d39SAndroid Build Coastguard Worker     int32_t length=remainingMatchLength_+1;  // Remaining match length.
87*0e209d39SAndroid Build Coastguard Worker     if(maxLength_>0 && length>maxLength_) {
88*0e209d39SAndroid Build Coastguard Worker         length=maxLength_;
89*0e209d39SAndroid Build Coastguard Worker     }
90*0e209d39SAndroid Build Coastguard Worker     str_.truncate(length);
91*0e209d39SAndroid Build Coastguard Worker     pos_+=length;
92*0e209d39SAndroid Build Coastguard Worker     remainingMatchLength_-=length;
93*0e209d39SAndroid Build Coastguard Worker     stack_->setSize(0);
94*0e209d39SAndroid Build Coastguard Worker     return *this;
95*0e209d39SAndroid Build Coastguard Worker }
96*0e209d39SAndroid Build Coastguard Worker 
97*0e209d39SAndroid Build Coastguard Worker UBool
hasNext() const98*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
99*0e209d39SAndroid Build Coastguard Worker 
100*0e209d39SAndroid Build Coastguard Worker UBool
next(UErrorCode & errorCode)101*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::next(UErrorCode &errorCode) {
102*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
103*0e209d39SAndroid Build Coastguard Worker         return false;
104*0e209d39SAndroid Build Coastguard Worker     }
105*0e209d39SAndroid Build Coastguard Worker     const char16_t *pos=pos_;
106*0e209d39SAndroid Build Coastguard Worker     if(pos==nullptr) {
107*0e209d39SAndroid Build Coastguard Worker         if(stack_->isEmpty()) {
108*0e209d39SAndroid Build Coastguard Worker             return false;
109*0e209d39SAndroid Build Coastguard Worker         }
110*0e209d39SAndroid Build Coastguard Worker         // Pop the state off the stack and continue with the next outbound edge of
111*0e209d39SAndroid Build Coastguard Worker         // the branch node.
112*0e209d39SAndroid Build Coastguard Worker         int32_t stackSize=stack_->size();
113*0e209d39SAndroid Build Coastguard Worker         int32_t length=stack_->elementAti(stackSize-1);
114*0e209d39SAndroid Build Coastguard Worker         pos=uchars_+stack_->elementAti(stackSize-2);
115*0e209d39SAndroid Build Coastguard Worker         stack_->setSize(stackSize-2);
116*0e209d39SAndroid Build Coastguard Worker         str_.truncate(length&0xffff);
117*0e209d39SAndroid Build Coastguard Worker         length=(int32_t)((uint32_t)length>>16);
118*0e209d39SAndroid Build Coastguard Worker         if(length>1) {
119*0e209d39SAndroid Build Coastguard Worker             pos=branchNext(pos, length, errorCode);
120*0e209d39SAndroid Build Coastguard Worker             if(pos==nullptr) {
121*0e209d39SAndroid Build Coastguard Worker                 return true;  // Reached a final value.
122*0e209d39SAndroid Build Coastguard Worker             }
123*0e209d39SAndroid Build Coastguard Worker         } else {
124*0e209d39SAndroid Build Coastguard Worker             str_.append(*pos++);
125*0e209d39SAndroid Build Coastguard Worker         }
126*0e209d39SAndroid Build Coastguard Worker     }
127*0e209d39SAndroid Build Coastguard Worker     if(remainingMatchLength_>=0) {
128*0e209d39SAndroid Build Coastguard Worker         // We only get here if we started in a pending linear-match node
129*0e209d39SAndroid Build Coastguard Worker         // with more than maxLength remaining units.
130*0e209d39SAndroid Build Coastguard Worker         return truncateAndStop();
131*0e209d39SAndroid Build Coastguard Worker     }
132*0e209d39SAndroid Build Coastguard Worker     for(;;) {
133*0e209d39SAndroid Build Coastguard Worker         int32_t node=*pos++;
134*0e209d39SAndroid Build Coastguard Worker         if(node>=kMinValueLead) {
135*0e209d39SAndroid Build Coastguard Worker             if(skipValue_) {
136*0e209d39SAndroid Build Coastguard Worker                 pos=skipNodeValue(pos, node);
137*0e209d39SAndroid Build Coastguard Worker                 node&=kNodeTypeMask;
138*0e209d39SAndroid Build Coastguard Worker                 skipValue_=false;
139*0e209d39SAndroid Build Coastguard Worker             } else {
140*0e209d39SAndroid Build Coastguard Worker                 // Deliver value for the string so far.
141*0e209d39SAndroid Build Coastguard Worker                 UBool isFinal=(UBool)(node>>15);
142*0e209d39SAndroid Build Coastguard Worker                 if(isFinal) {
143*0e209d39SAndroid Build Coastguard Worker                     value_=readValue(pos, node&0x7fff);
144*0e209d39SAndroid Build Coastguard Worker                 } else {
145*0e209d39SAndroid Build Coastguard Worker                     value_=readNodeValue(pos, node);
146*0e209d39SAndroid Build Coastguard Worker                 }
147*0e209d39SAndroid Build Coastguard Worker                 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
148*0e209d39SAndroid Build Coastguard Worker                     pos_=nullptr;
149*0e209d39SAndroid Build Coastguard Worker                 } else {
150*0e209d39SAndroid Build Coastguard Worker                     // We cannot skip the value right here because it shares its
151*0e209d39SAndroid Build Coastguard Worker                     // lead unit with a match node which we have to evaluate
152*0e209d39SAndroid Build Coastguard Worker                     // next time.
153*0e209d39SAndroid Build Coastguard Worker                     // Instead, keep pos_ on the node lead unit itself.
154*0e209d39SAndroid Build Coastguard Worker                     pos_=pos-1;
155*0e209d39SAndroid Build Coastguard Worker                     skipValue_=true;
156*0e209d39SAndroid Build Coastguard Worker                 }
157*0e209d39SAndroid Build Coastguard Worker                 return true;
158*0e209d39SAndroid Build Coastguard Worker             }
159*0e209d39SAndroid Build Coastguard Worker         }
160*0e209d39SAndroid Build Coastguard Worker         if(maxLength_>0 && str_.length()==maxLength_) {
161*0e209d39SAndroid Build Coastguard Worker             return truncateAndStop();
162*0e209d39SAndroid Build Coastguard Worker         }
163*0e209d39SAndroid Build Coastguard Worker         if(node<kMinLinearMatch) {
164*0e209d39SAndroid Build Coastguard Worker             if(node==0) {
165*0e209d39SAndroid Build Coastguard Worker                 node=*pos++;
166*0e209d39SAndroid Build Coastguard Worker             }
167*0e209d39SAndroid Build Coastguard Worker             pos=branchNext(pos, node+1, errorCode);
168*0e209d39SAndroid Build Coastguard Worker             if(pos==nullptr) {
169*0e209d39SAndroid Build Coastguard Worker                 return true;  // Reached a final value.
170*0e209d39SAndroid Build Coastguard Worker             }
171*0e209d39SAndroid Build Coastguard Worker         } else {
172*0e209d39SAndroid Build Coastguard Worker             // Linear-match node, append length units to str_.
173*0e209d39SAndroid Build Coastguard Worker             int32_t length=node-kMinLinearMatch+1;
174*0e209d39SAndroid Build Coastguard Worker             if(maxLength_>0 && str_.length()+length>maxLength_) {
175*0e209d39SAndroid Build Coastguard Worker                 str_.append(pos, maxLength_-str_.length());
176*0e209d39SAndroid Build Coastguard Worker                 return truncateAndStop();
177*0e209d39SAndroid Build Coastguard Worker             }
178*0e209d39SAndroid Build Coastguard Worker             str_.append(pos, length);
179*0e209d39SAndroid Build Coastguard Worker             pos+=length;
180*0e209d39SAndroid Build Coastguard Worker         }
181*0e209d39SAndroid Build Coastguard Worker     }
182*0e209d39SAndroid Build Coastguard Worker }
183*0e209d39SAndroid Build Coastguard Worker 
184*0e209d39SAndroid Build Coastguard Worker // Branch node, needs to take the first outbound edge and push state for the rest.
185*0e209d39SAndroid Build Coastguard Worker const char16_t *
branchNext(const char16_t * pos,int32_t length,UErrorCode & errorCode)186*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode) {
187*0e209d39SAndroid Build Coastguard Worker     while(length>kMaxBranchLinearSubNodeLength) {
188*0e209d39SAndroid Build Coastguard Worker         ++pos;  // ignore the comparison unit
189*0e209d39SAndroid Build Coastguard Worker         // Push state for the greater-or-equal edge.
190*0e209d39SAndroid Build Coastguard Worker         stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
191*0e209d39SAndroid Build Coastguard Worker         stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
192*0e209d39SAndroid Build Coastguard Worker         // Follow the less-than edge.
193*0e209d39SAndroid Build Coastguard Worker         length>>=1;
194*0e209d39SAndroid Build Coastguard Worker         pos=jumpByDelta(pos);
195*0e209d39SAndroid Build Coastguard Worker     }
196*0e209d39SAndroid Build Coastguard Worker     // List of key-value pairs where values are either final values or jump deltas.
197*0e209d39SAndroid Build Coastguard Worker     // Read the first (key, value) pair.
198*0e209d39SAndroid Build Coastguard Worker     char16_t trieUnit=*pos++;
199*0e209d39SAndroid Build Coastguard Worker     int32_t node=*pos++;
200*0e209d39SAndroid Build Coastguard Worker     UBool isFinal=(UBool)(node>>15);
201*0e209d39SAndroid Build Coastguard Worker     int32_t value=readValue(pos, node&=0x7fff);
202*0e209d39SAndroid Build Coastguard Worker     pos=skipValue(pos, node);
203*0e209d39SAndroid Build Coastguard Worker     stack_->addElement((int32_t)(pos-uchars_), errorCode);
204*0e209d39SAndroid Build Coastguard Worker     stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
205*0e209d39SAndroid Build Coastguard Worker     str_.append(trieUnit);
206*0e209d39SAndroid Build Coastguard Worker     if(isFinal) {
207*0e209d39SAndroid Build Coastguard Worker         pos_=nullptr;
208*0e209d39SAndroid Build Coastguard Worker         value_=value;
209*0e209d39SAndroid Build Coastguard Worker         return nullptr;
210*0e209d39SAndroid Build Coastguard Worker     } else {
211*0e209d39SAndroid Build Coastguard Worker         return pos+value;
212*0e209d39SAndroid Build Coastguard Worker     }
213*0e209d39SAndroid Build Coastguard Worker }
214*0e209d39SAndroid Build Coastguard Worker 
215*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
216