xref: /aosp_15_r20/external/icu/icu4c/source/common/bytestrieiterator.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 2010-2012, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker *   file name:  bytestrieiterator.cpp
9*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
10*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
11*0e209d39SAndroid Build Coastguard Worker *   indentation:4
12*0e209d39SAndroid Build Coastguard Worker *
13*0e209d39SAndroid Build Coastguard Worker *   created on: 2010nov03
14*0e209d39SAndroid Build Coastguard Worker *   created by: Markus W. Scherer
15*0e209d39SAndroid Build Coastguard Worker */
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
18*0e209d39SAndroid Build Coastguard Worker #include "unicode/bytestrie.h"
19*0e209d39SAndroid Build Coastguard Worker #include "unicode/stringpiece.h"
20*0e209d39SAndroid Build Coastguard Worker #include "charstr.h"
21*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h"
22*0e209d39SAndroid Build Coastguard Worker 
23*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
24*0e209d39SAndroid Build Coastguard Worker 
Iterator(const void * trieBytes,int32_t maxStringLength,UErrorCode & errorCode)25*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
26*0e209d39SAndroid Build Coastguard Worker                               UErrorCode &errorCode)
27*0e209d39SAndroid Build Coastguard Worker         : bytes_(static_cast<const uint8_t *>(trieBytes)),
28*0e209d39SAndroid Build Coastguard Worker           pos_(bytes_), initialPos_(bytes_),
29*0e209d39SAndroid Build Coastguard Worker           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30*0e209d39SAndroid Build Coastguard Worker           str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
31*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
32*0e209d39SAndroid Build Coastguard Worker         return;
33*0e209d39SAndroid Build Coastguard Worker     }
34*0e209d39SAndroid Build Coastguard Worker     // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
35*0e209d39SAndroid Build Coastguard Worker     // a public API header for which we would want it to depend only on
36*0e209d39SAndroid Build Coastguard Worker     // other public headers.
37*0e209d39SAndroid Build Coastguard Worker     // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
38*0e209d39SAndroid Build Coastguard Worker     // via the CharString and UVector32 implementations, so this additional
39*0e209d39SAndroid Build Coastguard Worker     // cost is minimal.
40*0e209d39SAndroid Build Coastguard Worker     str_=new CharString();
41*0e209d39SAndroid Build Coastguard Worker     stack_=new UVector32(errorCode);
42*0e209d39SAndroid Build Coastguard Worker     if(U_SUCCESS(errorCode) && (str_==nullptr || stack_==nullptr)) {
43*0e209d39SAndroid Build Coastguard Worker         errorCode=U_MEMORY_ALLOCATION_ERROR;
44*0e209d39SAndroid Build Coastguard Worker     }
45*0e209d39SAndroid Build Coastguard Worker }
46*0e209d39SAndroid Build Coastguard Worker 
Iterator(const BytesTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)47*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
48*0e209d39SAndroid Build Coastguard Worker                               UErrorCode &errorCode)
49*0e209d39SAndroid Build Coastguard Worker         : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
50*0e209d39SAndroid Build Coastguard Worker           remainingMatchLength_(trie.remainingMatchLength_),
51*0e209d39SAndroid Build Coastguard Worker           initialRemainingMatchLength_(trie.remainingMatchLength_),
52*0e209d39SAndroid Build Coastguard Worker           str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
53*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
54*0e209d39SAndroid Build Coastguard Worker         return;
55*0e209d39SAndroid Build Coastguard Worker     }
56*0e209d39SAndroid Build Coastguard Worker     str_=new CharString();
57*0e209d39SAndroid Build Coastguard Worker     stack_=new UVector32(errorCode);
58*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
59*0e209d39SAndroid Build Coastguard Worker         return;
60*0e209d39SAndroid Build Coastguard Worker     }
61*0e209d39SAndroid Build Coastguard Worker     if(str_==nullptr || stack_==nullptr) {
62*0e209d39SAndroid Build Coastguard Worker         errorCode=U_MEMORY_ALLOCATION_ERROR;
63*0e209d39SAndroid Build Coastguard Worker         return;
64*0e209d39SAndroid Build Coastguard Worker     }
65*0e209d39SAndroid Build Coastguard Worker     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
66*0e209d39SAndroid Build Coastguard Worker     if(length>=0) {
67*0e209d39SAndroid Build Coastguard Worker         // Pending linear-match node, append remaining bytes to str_.
68*0e209d39SAndroid Build Coastguard Worker         ++length;
69*0e209d39SAndroid Build Coastguard Worker         if(maxLength_>0 && length>maxLength_) {
70*0e209d39SAndroid Build Coastguard Worker             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
71*0e209d39SAndroid Build Coastguard Worker         }
72*0e209d39SAndroid Build Coastguard Worker         str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
73*0e209d39SAndroid Build Coastguard Worker         pos_+=length;
74*0e209d39SAndroid Build Coastguard Worker         remainingMatchLength_-=length;
75*0e209d39SAndroid Build Coastguard Worker     }
76*0e209d39SAndroid Build Coastguard Worker }
77*0e209d39SAndroid Build Coastguard Worker 
~Iterator()78*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::~Iterator() {
79*0e209d39SAndroid Build Coastguard Worker     delete str_;
80*0e209d39SAndroid Build Coastguard Worker     delete stack_;
81*0e209d39SAndroid Build Coastguard Worker }
82*0e209d39SAndroid Build Coastguard Worker 
83*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator &
reset()84*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::reset() {
85*0e209d39SAndroid Build Coastguard Worker     pos_=initialPos_;
86*0e209d39SAndroid Build Coastguard Worker     remainingMatchLength_=initialRemainingMatchLength_;
87*0e209d39SAndroid Build Coastguard Worker     int32_t length=remainingMatchLength_+1;  // Remaining match length.
88*0e209d39SAndroid Build Coastguard Worker     if(maxLength_>0 && length>maxLength_) {
89*0e209d39SAndroid Build Coastguard Worker         length=maxLength_;
90*0e209d39SAndroid Build Coastguard Worker     }
91*0e209d39SAndroid Build Coastguard Worker     str_->truncate(length);
92*0e209d39SAndroid Build Coastguard Worker     pos_+=length;
93*0e209d39SAndroid Build Coastguard Worker     remainingMatchLength_-=length;
94*0e209d39SAndroid Build Coastguard Worker     stack_->setSize(0);
95*0e209d39SAndroid Build Coastguard Worker     return *this;
96*0e209d39SAndroid Build Coastguard Worker }
97*0e209d39SAndroid Build Coastguard Worker 
98*0e209d39SAndroid Build Coastguard Worker UBool
hasNext() const99*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
100*0e209d39SAndroid Build Coastguard Worker 
101*0e209d39SAndroid Build Coastguard Worker UBool
next(UErrorCode & errorCode)102*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::next(UErrorCode &errorCode) {
103*0e209d39SAndroid Build Coastguard Worker     if(U_FAILURE(errorCode)) {
104*0e209d39SAndroid Build Coastguard Worker         return false;
105*0e209d39SAndroid Build Coastguard Worker     }
106*0e209d39SAndroid Build Coastguard Worker     const uint8_t *pos=pos_;
107*0e209d39SAndroid Build Coastguard Worker     if(pos==nullptr) {
108*0e209d39SAndroid Build Coastguard Worker         if(stack_->isEmpty()) {
109*0e209d39SAndroid Build Coastguard Worker             return false;
110*0e209d39SAndroid Build Coastguard Worker         }
111*0e209d39SAndroid Build Coastguard Worker         // Pop the state off the stack and continue with the next outbound edge of
112*0e209d39SAndroid Build Coastguard Worker         // the branch node.
113*0e209d39SAndroid Build Coastguard Worker         int32_t stackSize=stack_->size();
114*0e209d39SAndroid Build Coastguard Worker         int32_t length=stack_->elementAti(stackSize-1);
115*0e209d39SAndroid Build Coastguard Worker         pos=bytes_+stack_->elementAti(stackSize-2);
116*0e209d39SAndroid Build Coastguard Worker         stack_->setSize(stackSize-2);
117*0e209d39SAndroid Build Coastguard Worker         str_->truncate(length&0xffff);
118*0e209d39SAndroid Build Coastguard Worker         length=(int32_t)((uint32_t)length>>16);
119*0e209d39SAndroid Build Coastguard Worker         if(length>1) {
120*0e209d39SAndroid Build Coastguard Worker             pos=branchNext(pos, length, errorCode);
121*0e209d39SAndroid Build Coastguard Worker             if(pos==nullptr) {
122*0e209d39SAndroid Build Coastguard Worker                 return true;  // Reached a final value.
123*0e209d39SAndroid Build Coastguard Worker             }
124*0e209d39SAndroid Build Coastguard Worker         } else {
125*0e209d39SAndroid Build Coastguard Worker             str_->append((char)*pos++, errorCode);
126*0e209d39SAndroid Build Coastguard Worker         }
127*0e209d39SAndroid Build Coastguard Worker     }
128*0e209d39SAndroid Build Coastguard Worker     if(remainingMatchLength_>=0) {
129*0e209d39SAndroid Build Coastguard Worker         // We only get here if we started in a pending linear-match node
130*0e209d39SAndroid Build Coastguard Worker         // with more than maxLength remaining bytes.
131*0e209d39SAndroid Build Coastguard Worker         return truncateAndStop();
132*0e209d39SAndroid Build Coastguard Worker     }
133*0e209d39SAndroid Build Coastguard Worker     for(;;) {
134*0e209d39SAndroid Build Coastguard Worker         int32_t node=*pos++;
135*0e209d39SAndroid Build Coastguard Worker         if(node>=kMinValueLead) {
136*0e209d39SAndroid Build Coastguard Worker             // Deliver value for the byte sequence so far.
137*0e209d39SAndroid Build Coastguard Worker             UBool isFinal=(UBool)(node&kValueIsFinal);
138*0e209d39SAndroid Build Coastguard Worker             value_=readValue(pos, node>>1);
139*0e209d39SAndroid Build Coastguard Worker             if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
140*0e209d39SAndroid Build Coastguard Worker                 pos_=nullptr;
141*0e209d39SAndroid Build Coastguard Worker             } else {
142*0e209d39SAndroid Build Coastguard Worker                 pos_=skipValue(pos, node);
143*0e209d39SAndroid Build Coastguard Worker             }
144*0e209d39SAndroid Build Coastguard Worker             return true;
145*0e209d39SAndroid Build Coastguard Worker         }
146*0e209d39SAndroid Build Coastguard Worker         if(maxLength_>0 && str_->length()==maxLength_) {
147*0e209d39SAndroid Build Coastguard Worker             return truncateAndStop();
148*0e209d39SAndroid Build Coastguard Worker         }
149*0e209d39SAndroid Build Coastguard Worker         if(node<kMinLinearMatch) {
150*0e209d39SAndroid Build Coastguard Worker             if(node==0) {
151*0e209d39SAndroid Build Coastguard Worker                 node=*pos++;
152*0e209d39SAndroid Build Coastguard Worker             }
153*0e209d39SAndroid Build Coastguard Worker             pos=branchNext(pos, node+1, errorCode);
154*0e209d39SAndroid Build Coastguard Worker             if(pos==nullptr) {
155*0e209d39SAndroid Build Coastguard Worker                 return true;  // Reached a final value.
156*0e209d39SAndroid Build Coastguard Worker             }
157*0e209d39SAndroid Build Coastguard Worker         } else {
158*0e209d39SAndroid Build Coastguard Worker             // Linear-match node, append length bytes to str_.
159*0e209d39SAndroid Build Coastguard Worker             int32_t length=node-kMinLinearMatch+1;
160*0e209d39SAndroid Build Coastguard Worker             if(maxLength_>0 && str_->length()+length>maxLength_) {
161*0e209d39SAndroid Build Coastguard Worker                 str_->append(reinterpret_cast<const char *>(pos),
162*0e209d39SAndroid Build Coastguard Worker                             maxLength_-str_->length(), errorCode);
163*0e209d39SAndroid Build Coastguard Worker                 return truncateAndStop();
164*0e209d39SAndroid Build Coastguard Worker             }
165*0e209d39SAndroid Build Coastguard Worker             str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
166*0e209d39SAndroid Build Coastguard Worker             pos+=length;
167*0e209d39SAndroid Build Coastguard Worker         }
168*0e209d39SAndroid Build Coastguard Worker     }
169*0e209d39SAndroid Build Coastguard Worker }
170*0e209d39SAndroid Build Coastguard Worker 
171*0e209d39SAndroid Build Coastguard Worker StringPiece
getString() const172*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::getString() const {
173*0e209d39SAndroid Build Coastguard Worker     return str_ == nullptr ? StringPiece() : str_->toStringPiece();
174*0e209d39SAndroid Build Coastguard Worker }
175*0e209d39SAndroid Build Coastguard Worker 
176*0e209d39SAndroid Build Coastguard Worker UBool
truncateAndStop()177*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::truncateAndStop() {
178*0e209d39SAndroid Build Coastguard Worker     pos_=nullptr;
179*0e209d39SAndroid Build Coastguard Worker     value_=-1;  // no real value for str
180*0e209d39SAndroid Build Coastguard Worker     return true;
181*0e209d39SAndroid Build Coastguard Worker }
182*0e209d39SAndroid Build Coastguard Worker 
183*0e209d39SAndroid Build Coastguard Worker // Branch node, needs to take the first outbound edge and push state for the rest.
184*0e209d39SAndroid Build Coastguard Worker const uint8_t *
branchNext(const uint8_t * pos,int32_t length,UErrorCode & errorCode)185*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
186*0e209d39SAndroid Build Coastguard Worker     while(length>kMaxBranchLinearSubNodeLength) {
187*0e209d39SAndroid Build Coastguard Worker         ++pos;  // ignore the comparison byte
188*0e209d39SAndroid Build Coastguard Worker         // Push state for the greater-or-equal edge.
189*0e209d39SAndroid Build Coastguard Worker         stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
190*0e209d39SAndroid Build Coastguard Worker         stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
191*0e209d39SAndroid Build Coastguard Worker         // Follow the less-than edge.
192*0e209d39SAndroid Build Coastguard Worker         length>>=1;
193*0e209d39SAndroid Build Coastguard Worker         pos=jumpByDelta(pos);
194*0e209d39SAndroid Build Coastguard Worker     }
195*0e209d39SAndroid Build Coastguard Worker     // List of key-value pairs where values are either final values or jump deltas.
196*0e209d39SAndroid Build Coastguard Worker     // Read the first (key, value) pair.
197*0e209d39SAndroid Build Coastguard Worker     uint8_t trieByte=*pos++;
198*0e209d39SAndroid Build Coastguard Worker     int32_t node=*pos++;
199*0e209d39SAndroid Build Coastguard Worker     UBool isFinal=(UBool)(node&kValueIsFinal);
200*0e209d39SAndroid Build Coastguard Worker     int32_t value=readValue(pos, node>>1);
201*0e209d39SAndroid Build Coastguard Worker     pos=skipValue(pos, node);
202*0e209d39SAndroid Build Coastguard Worker     stack_->addElement((int32_t)(pos-bytes_), errorCode);
203*0e209d39SAndroid Build Coastguard Worker     stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
204*0e209d39SAndroid Build Coastguard Worker     str_->append((char)trieByte, errorCode);
205*0e209d39SAndroid Build Coastguard Worker     if(isFinal) {
206*0e209d39SAndroid Build Coastguard Worker         pos_=nullptr;
207*0e209d39SAndroid Build Coastguard Worker         value_=value;
208*0e209d39SAndroid Build Coastguard Worker         return nullptr;
209*0e209d39SAndroid Build Coastguard Worker     } else {
210*0e209d39SAndroid Build Coastguard Worker         return pos+value;
211*0e209d39SAndroid Build Coastguard Worker     }
212*0e209d39SAndroid Build Coastguard Worker }
213*0e209d39SAndroid Build Coastguard Worker 
214*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
215