1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2010-2011, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker * file name: ucharstrieiterator.h
9*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8
10*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used)
11*0e209d39SAndroid Build Coastguard Worker * indentation:4
12*0e209d39SAndroid Build Coastguard Worker *
13*0e209d39SAndroid Build Coastguard Worker * created on: 2010nov15
14*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer
15*0e209d39SAndroid Build Coastguard Worker */
16*0e209d39SAndroid Build Coastguard Worker
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
18*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucharstrie.h"
19*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
20*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h"
21*0e209d39SAndroid Build Coastguard Worker
22*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
23*0e209d39SAndroid Build Coastguard Worker
Iterator(ConstChar16Ptr trieUChars,int32_t maxStringLength,UErrorCode & errorCode)24*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
25*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode)
26*0e209d39SAndroid Build Coastguard Worker : uchars_(trieUChars),
27*0e209d39SAndroid Build Coastguard Worker pos_(uchars_), initialPos_(uchars_),
28*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
29*0e209d39SAndroid Build Coastguard Worker skipValue_(false),
30*0e209d39SAndroid Build Coastguard Worker maxLength_(maxStringLength), value_(0), stack_(nullptr) {
31*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
32*0e209d39SAndroid Build Coastguard Worker return;
33*0e209d39SAndroid Build Coastguard Worker }
34*0e209d39SAndroid Build Coastguard Worker // stack_ is a pointer so that it's easy to turn ucharstrie.h into
35*0e209d39SAndroid Build Coastguard Worker // a public API header for which we would want it to depend only on
36*0e209d39SAndroid Build Coastguard Worker // other public headers.
37*0e209d39SAndroid Build Coastguard Worker // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
38*0e209d39SAndroid Build Coastguard Worker // via the UnicodeString and UVector32 implementations, so this additional
39*0e209d39SAndroid Build Coastguard Worker // cost is minimal.
40*0e209d39SAndroid Build Coastguard Worker stack_=new UVector32(errorCode);
41*0e209d39SAndroid Build Coastguard Worker if(stack_==nullptr) {
42*0e209d39SAndroid Build Coastguard Worker errorCode=U_MEMORY_ALLOCATION_ERROR;
43*0e209d39SAndroid Build Coastguard Worker }
44*0e209d39SAndroid Build Coastguard Worker }
45*0e209d39SAndroid Build Coastguard Worker
Iterator(const UCharsTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)46*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
47*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode)
48*0e209d39SAndroid Build Coastguard Worker : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
49*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_(trie.remainingMatchLength_),
50*0e209d39SAndroid Build Coastguard Worker initialRemainingMatchLength_(trie.remainingMatchLength_),
51*0e209d39SAndroid Build Coastguard Worker skipValue_(false),
52*0e209d39SAndroid Build Coastguard Worker maxLength_(maxStringLength), value_(0), stack_(nullptr) {
53*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
54*0e209d39SAndroid Build Coastguard Worker return;
55*0e209d39SAndroid Build Coastguard Worker }
56*0e209d39SAndroid Build Coastguard Worker stack_=new UVector32(errorCode);
57*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
58*0e209d39SAndroid Build Coastguard Worker return;
59*0e209d39SAndroid Build Coastguard Worker }
60*0e209d39SAndroid Build Coastguard Worker if(stack_==nullptr) {
61*0e209d39SAndroid Build Coastguard Worker errorCode=U_MEMORY_ALLOCATION_ERROR;
62*0e209d39SAndroid Build Coastguard Worker return;
63*0e209d39SAndroid Build Coastguard Worker }
64*0e209d39SAndroid Build Coastguard Worker int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
65*0e209d39SAndroid Build Coastguard Worker if(length>=0) {
66*0e209d39SAndroid Build Coastguard Worker // Pending linear-match node, append remaining UChars to str_.
67*0e209d39SAndroid Build Coastguard Worker ++length;
68*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && length>maxLength_) {
69*0e209d39SAndroid Build Coastguard Worker length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
70*0e209d39SAndroid Build Coastguard Worker }
71*0e209d39SAndroid Build Coastguard Worker str_.append(pos_, length);
72*0e209d39SAndroid Build Coastguard Worker pos_+=length;
73*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_-=length;
74*0e209d39SAndroid Build Coastguard Worker }
75*0e209d39SAndroid Build Coastguard Worker }
76*0e209d39SAndroid Build Coastguard Worker
~Iterator()77*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::~Iterator() {
78*0e209d39SAndroid Build Coastguard Worker delete stack_;
79*0e209d39SAndroid Build Coastguard Worker }
80*0e209d39SAndroid Build Coastguard Worker
81*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator &
reset()82*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::reset() {
83*0e209d39SAndroid Build Coastguard Worker pos_=initialPos_;
84*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_=initialRemainingMatchLength_;
85*0e209d39SAndroid Build Coastguard Worker skipValue_=false;
86*0e209d39SAndroid Build Coastguard Worker int32_t length=remainingMatchLength_+1; // Remaining match length.
87*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && length>maxLength_) {
88*0e209d39SAndroid Build Coastguard Worker length=maxLength_;
89*0e209d39SAndroid Build Coastguard Worker }
90*0e209d39SAndroid Build Coastguard Worker str_.truncate(length);
91*0e209d39SAndroid Build Coastguard Worker pos_+=length;
92*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_-=length;
93*0e209d39SAndroid Build Coastguard Worker stack_->setSize(0);
94*0e209d39SAndroid Build Coastguard Worker return *this;
95*0e209d39SAndroid Build Coastguard Worker }
96*0e209d39SAndroid Build Coastguard Worker
97*0e209d39SAndroid Build Coastguard Worker UBool
hasNext() const98*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
99*0e209d39SAndroid Build Coastguard Worker
100*0e209d39SAndroid Build Coastguard Worker UBool
next(UErrorCode & errorCode)101*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::next(UErrorCode &errorCode) {
102*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
103*0e209d39SAndroid Build Coastguard Worker return false;
104*0e209d39SAndroid Build Coastguard Worker }
105*0e209d39SAndroid Build Coastguard Worker const char16_t *pos=pos_;
106*0e209d39SAndroid Build Coastguard Worker if(pos==nullptr) {
107*0e209d39SAndroid Build Coastguard Worker if(stack_->isEmpty()) {
108*0e209d39SAndroid Build Coastguard Worker return false;
109*0e209d39SAndroid Build Coastguard Worker }
110*0e209d39SAndroid Build Coastguard Worker // Pop the state off the stack and continue with the next outbound edge of
111*0e209d39SAndroid Build Coastguard Worker // the branch node.
112*0e209d39SAndroid Build Coastguard Worker int32_t stackSize=stack_->size();
113*0e209d39SAndroid Build Coastguard Worker int32_t length=stack_->elementAti(stackSize-1);
114*0e209d39SAndroid Build Coastguard Worker pos=uchars_+stack_->elementAti(stackSize-2);
115*0e209d39SAndroid Build Coastguard Worker stack_->setSize(stackSize-2);
116*0e209d39SAndroid Build Coastguard Worker str_.truncate(length&0xffff);
117*0e209d39SAndroid Build Coastguard Worker length=(int32_t)((uint32_t)length>>16);
118*0e209d39SAndroid Build Coastguard Worker if(length>1) {
119*0e209d39SAndroid Build Coastguard Worker pos=branchNext(pos, length, errorCode);
120*0e209d39SAndroid Build Coastguard Worker if(pos==nullptr) {
121*0e209d39SAndroid Build Coastguard Worker return true; // Reached a final value.
122*0e209d39SAndroid Build Coastguard Worker }
123*0e209d39SAndroid Build Coastguard Worker } else {
124*0e209d39SAndroid Build Coastguard Worker str_.append(*pos++);
125*0e209d39SAndroid Build Coastguard Worker }
126*0e209d39SAndroid Build Coastguard Worker }
127*0e209d39SAndroid Build Coastguard Worker if(remainingMatchLength_>=0) {
128*0e209d39SAndroid Build Coastguard Worker // We only get here if we started in a pending linear-match node
129*0e209d39SAndroid Build Coastguard Worker // with more than maxLength remaining units.
130*0e209d39SAndroid Build Coastguard Worker return truncateAndStop();
131*0e209d39SAndroid Build Coastguard Worker }
132*0e209d39SAndroid Build Coastguard Worker for(;;) {
133*0e209d39SAndroid Build Coastguard Worker int32_t node=*pos++;
134*0e209d39SAndroid Build Coastguard Worker if(node>=kMinValueLead) {
135*0e209d39SAndroid Build Coastguard Worker if(skipValue_) {
136*0e209d39SAndroid Build Coastguard Worker pos=skipNodeValue(pos, node);
137*0e209d39SAndroid Build Coastguard Worker node&=kNodeTypeMask;
138*0e209d39SAndroid Build Coastguard Worker skipValue_=false;
139*0e209d39SAndroid Build Coastguard Worker } else {
140*0e209d39SAndroid Build Coastguard Worker // Deliver value for the string so far.
141*0e209d39SAndroid Build Coastguard Worker UBool isFinal=(UBool)(node>>15);
142*0e209d39SAndroid Build Coastguard Worker if(isFinal) {
143*0e209d39SAndroid Build Coastguard Worker value_=readValue(pos, node&0x7fff);
144*0e209d39SAndroid Build Coastguard Worker } else {
145*0e209d39SAndroid Build Coastguard Worker value_=readNodeValue(pos, node);
146*0e209d39SAndroid Build Coastguard Worker }
147*0e209d39SAndroid Build Coastguard Worker if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
148*0e209d39SAndroid Build Coastguard Worker pos_=nullptr;
149*0e209d39SAndroid Build Coastguard Worker } else {
150*0e209d39SAndroid Build Coastguard Worker // We cannot skip the value right here because it shares its
151*0e209d39SAndroid Build Coastguard Worker // lead unit with a match node which we have to evaluate
152*0e209d39SAndroid Build Coastguard Worker // next time.
153*0e209d39SAndroid Build Coastguard Worker // Instead, keep pos_ on the node lead unit itself.
154*0e209d39SAndroid Build Coastguard Worker pos_=pos-1;
155*0e209d39SAndroid Build Coastguard Worker skipValue_=true;
156*0e209d39SAndroid Build Coastguard Worker }
157*0e209d39SAndroid Build Coastguard Worker return true;
158*0e209d39SAndroid Build Coastguard Worker }
159*0e209d39SAndroid Build Coastguard Worker }
160*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && str_.length()==maxLength_) {
161*0e209d39SAndroid Build Coastguard Worker return truncateAndStop();
162*0e209d39SAndroid Build Coastguard Worker }
163*0e209d39SAndroid Build Coastguard Worker if(node<kMinLinearMatch) {
164*0e209d39SAndroid Build Coastguard Worker if(node==0) {
165*0e209d39SAndroid Build Coastguard Worker node=*pos++;
166*0e209d39SAndroid Build Coastguard Worker }
167*0e209d39SAndroid Build Coastguard Worker pos=branchNext(pos, node+1, errorCode);
168*0e209d39SAndroid Build Coastguard Worker if(pos==nullptr) {
169*0e209d39SAndroid Build Coastguard Worker return true; // Reached a final value.
170*0e209d39SAndroid Build Coastguard Worker }
171*0e209d39SAndroid Build Coastguard Worker } else {
172*0e209d39SAndroid Build Coastguard Worker // Linear-match node, append length units to str_.
173*0e209d39SAndroid Build Coastguard Worker int32_t length=node-kMinLinearMatch+1;
174*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && str_.length()+length>maxLength_) {
175*0e209d39SAndroid Build Coastguard Worker str_.append(pos, maxLength_-str_.length());
176*0e209d39SAndroid Build Coastguard Worker return truncateAndStop();
177*0e209d39SAndroid Build Coastguard Worker }
178*0e209d39SAndroid Build Coastguard Worker str_.append(pos, length);
179*0e209d39SAndroid Build Coastguard Worker pos+=length;
180*0e209d39SAndroid Build Coastguard Worker }
181*0e209d39SAndroid Build Coastguard Worker }
182*0e209d39SAndroid Build Coastguard Worker }
183*0e209d39SAndroid Build Coastguard Worker
184*0e209d39SAndroid Build Coastguard Worker // Branch node, needs to take the first outbound edge and push state for the rest.
185*0e209d39SAndroid Build Coastguard Worker const char16_t *
branchNext(const char16_t * pos,int32_t length,UErrorCode & errorCode)186*0e209d39SAndroid Build Coastguard Worker UCharsTrie::Iterator::branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode) {
187*0e209d39SAndroid Build Coastguard Worker while(length>kMaxBranchLinearSubNodeLength) {
188*0e209d39SAndroid Build Coastguard Worker ++pos; // ignore the comparison unit
189*0e209d39SAndroid Build Coastguard Worker // Push state for the greater-or-equal edge.
190*0e209d39SAndroid Build Coastguard Worker stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
191*0e209d39SAndroid Build Coastguard Worker stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
192*0e209d39SAndroid Build Coastguard Worker // Follow the less-than edge.
193*0e209d39SAndroid Build Coastguard Worker length>>=1;
194*0e209d39SAndroid Build Coastguard Worker pos=jumpByDelta(pos);
195*0e209d39SAndroid Build Coastguard Worker }
196*0e209d39SAndroid Build Coastguard Worker // List of key-value pairs where values are either final values or jump deltas.
197*0e209d39SAndroid Build Coastguard Worker // Read the first (key, value) pair.
198*0e209d39SAndroid Build Coastguard Worker char16_t trieUnit=*pos++;
199*0e209d39SAndroid Build Coastguard Worker int32_t node=*pos++;
200*0e209d39SAndroid Build Coastguard Worker UBool isFinal=(UBool)(node>>15);
201*0e209d39SAndroid Build Coastguard Worker int32_t value=readValue(pos, node&=0x7fff);
202*0e209d39SAndroid Build Coastguard Worker pos=skipValue(pos, node);
203*0e209d39SAndroid Build Coastguard Worker stack_->addElement((int32_t)(pos-uchars_), errorCode);
204*0e209d39SAndroid Build Coastguard Worker stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
205*0e209d39SAndroid Build Coastguard Worker str_.append(trieUnit);
206*0e209d39SAndroid Build Coastguard Worker if(isFinal) {
207*0e209d39SAndroid Build Coastguard Worker pos_=nullptr;
208*0e209d39SAndroid Build Coastguard Worker value_=value;
209*0e209d39SAndroid Build Coastguard Worker return nullptr;
210*0e209d39SAndroid Build Coastguard Worker } else {
211*0e209d39SAndroid Build Coastguard Worker return pos+value;
212*0e209d39SAndroid Build Coastguard Worker }
213*0e209d39SAndroid Build Coastguard Worker }
214*0e209d39SAndroid Build Coastguard Worker
215*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
216