1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2010-2012, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker * file name: bytestrieiterator.cpp
9*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8
10*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used)
11*0e209d39SAndroid Build Coastguard Worker * indentation:4
12*0e209d39SAndroid Build Coastguard Worker *
13*0e209d39SAndroid Build Coastguard Worker * created on: 2010nov03
14*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer
15*0e209d39SAndroid Build Coastguard Worker */
16*0e209d39SAndroid Build Coastguard Worker
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
18*0e209d39SAndroid Build Coastguard Worker #include "unicode/bytestrie.h"
19*0e209d39SAndroid Build Coastguard Worker #include "unicode/stringpiece.h"
20*0e209d39SAndroid Build Coastguard Worker #include "charstr.h"
21*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h"
22*0e209d39SAndroid Build Coastguard Worker
23*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
24*0e209d39SAndroid Build Coastguard Worker
Iterator(const void * trieBytes,int32_t maxStringLength,UErrorCode & errorCode)25*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
26*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode)
27*0e209d39SAndroid Build Coastguard Worker : bytes_(static_cast<const uint8_t *>(trieBytes)),
28*0e209d39SAndroid Build Coastguard Worker pos_(bytes_), initialPos_(bytes_),
29*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30*0e209d39SAndroid Build Coastguard Worker str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
31*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
32*0e209d39SAndroid Build Coastguard Worker return;
33*0e209d39SAndroid Build Coastguard Worker }
34*0e209d39SAndroid Build Coastguard Worker // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
35*0e209d39SAndroid Build Coastguard Worker // a public API header for which we would want it to depend only on
36*0e209d39SAndroid Build Coastguard Worker // other public headers.
37*0e209d39SAndroid Build Coastguard Worker // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
38*0e209d39SAndroid Build Coastguard Worker // via the CharString and UVector32 implementations, so this additional
39*0e209d39SAndroid Build Coastguard Worker // cost is minimal.
40*0e209d39SAndroid Build Coastguard Worker str_=new CharString();
41*0e209d39SAndroid Build Coastguard Worker stack_=new UVector32(errorCode);
42*0e209d39SAndroid Build Coastguard Worker if(U_SUCCESS(errorCode) && (str_==nullptr || stack_==nullptr)) {
43*0e209d39SAndroid Build Coastguard Worker errorCode=U_MEMORY_ALLOCATION_ERROR;
44*0e209d39SAndroid Build Coastguard Worker }
45*0e209d39SAndroid Build Coastguard Worker }
46*0e209d39SAndroid Build Coastguard Worker
Iterator(const BytesTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)47*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
48*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode)
49*0e209d39SAndroid Build Coastguard Worker : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
50*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_(trie.remainingMatchLength_),
51*0e209d39SAndroid Build Coastguard Worker initialRemainingMatchLength_(trie.remainingMatchLength_),
52*0e209d39SAndroid Build Coastguard Worker str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
53*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
54*0e209d39SAndroid Build Coastguard Worker return;
55*0e209d39SAndroid Build Coastguard Worker }
56*0e209d39SAndroid Build Coastguard Worker str_=new CharString();
57*0e209d39SAndroid Build Coastguard Worker stack_=new UVector32(errorCode);
58*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
59*0e209d39SAndroid Build Coastguard Worker return;
60*0e209d39SAndroid Build Coastguard Worker }
61*0e209d39SAndroid Build Coastguard Worker if(str_==nullptr || stack_==nullptr) {
62*0e209d39SAndroid Build Coastguard Worker errorCode=U_MEMORY_ALLOCATION_ERROR;
63*0e209d39SAndroid Build Coastguard Worker return;
64*0e209d39SAndroid Build Coastguard Worker }
65*0e209d39SAndroid Build Coastguard Worker int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
66*0e209d39SAndroid Build Coastguard Worker if(length>=0) {
67*0e209d39SAndroid Build Coastguard Worker // Pending linear-match node, append remaining bytes to str_.
68*0e209d39SAndroid Build Coastguard Worker ++length;
69*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && length>maxLength_) {
70*0e209d39SAndroid Build Coastguard Worker length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
71*0e209d39SAndroid Build Coastguard Worker }
72*0e209d39SAndroid Build Coastguard Worker str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
73*0e209d39SAndroid Build Coastguard Worker pos_+=length;
74*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_-=length;
75*0e209d39SAndroid Build Coastguard Worker }
76*0e209d39SAndroid Build Coastguard Worker }
77*0e209d39SAndroid Build Coastguard Worker
~Iterator()78*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::~Iterator() {
79*0e209d39SAndroid Build Coastguard Worker delete str_;
80*0e209d39SAndroid Build Coastguard Worker delete stack_;
81*0e209d39SAndroid Build Coastguard Worker }
82*0e209d39SAndroid Build Coastguard Worker
83*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator &
reset()84*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::reset() {
85*0e209d39SAndroid Build Coastguard Worker pos_=initialPos_;
86*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_=initialRemainingMatchLength_;
87*0e209d39SAndroid Build Coastguard Worker int32_t length=remainingMatchLength_+1; // Remaining match length.
88*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && length>maxLength_) {
89*0e209d39SAndroid Build Coastguard Worker length=maxLength_;
90*0e209d39SAndroid Build Coastguard Worker }
91*0e209d39SAndroid Build Coastguard Worker str_->truncate(length);
92*0e209d39SAndroid Build Coastguard Worker pos_+=length;
93*0e209d39SAndroid Build Coastguard Worker remainingMatchLength_-=length;
94*0e209d39SAndroid Build Coastguard Worker stack_->setSize(0);
95*0e209d39SAndroid Build Coastguard Worker return *this;
96*0e209d39SAndroid Build Coastguard Worker }
97*0e209d39SAndroid Build Coastguard Worker
98*0e209d39SAndroid Build Coastguard Worker UBool
hasNext() const99*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
100*0e209d39SAndroid Build Coastguard Worker
101*0e209d39SAndroid Build Coastguard Worker UBool
next(UErrorCode & errorCode)102*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::next(UErrorCode &errorCode) {
103*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
104*0e209d39SAndroid Build Coastguard Worker return false;
105*0e209d39SAndroid Build Coastguard Worker }
106*0e209d39SAndroid Build Coastguard Worker const uint8_t *pos=pos_;
107*0e209d39SAndroid Build Coastguard Worker if(pos==nullptr) {
108*0e209d39SAndroid Build Coastguard Worker if(stack_->isEmpty()) {
109*0e209d39SAndroid Build Coastguard Worker return false;
110*0e209d39SAndroid Build Coastguard Worker }
111*0e209d39SAndroid Build Coastguard Worker // Pop the state off the stack and continue with the next outbound edge of
112*0e209d39SAndroid Build Coastguard Worker // the branch node.
113*0e209d39SAndroid Build Coastguard Worker int32_t stackSize=stack_->size();
114*0e209d39SAndroid Build Coastguard Worker int32_t length=stack_->elementAti(stackSize-1);
115*0e209d39SAndroid Build Coastguard Worker pos=bytes_+stack_->elementAti(stackSize-2);
116*0e209d39SAndroid Build Coastguard Worker stack_->setSize(stackSize-2);
117*0e209d39SAndroid Build Coastguard Worker str_->truncate(length&0xffff);
118*0e209d39SAndroid Build Coastguard Worker length=(int32_t)((uint32_t)length>>16);
119*0e209d39SAndroid Build Coastguard Worker if(length>1) {
120*0e209d39SAndroid Build Coastguard Worker pos=branchNext(pos, length, errorCode);
121*0e209d39SAndroid Build Coastguard Worker if(pos==nullptr) {
122*0e209d39SAndroid Build Coastguard Worker return true; // Reached a final value.
123*0e209d39SAndroid Build Coastguard Worker }
124*0e209d39SAndroid Build Coastguard Worker } else {
125*0e209d39SAndroid Build Coastguard Worker str_->append((char)*pos++, errorCode);
126*0e209d39SAndroid Build Coastguard Worker }
127*0e209d39SAndroid Build Coastguard Worker }
128*0e209d39SAndroid Build Coastguard Worker if(remainingMatchLength_>=0) {
129*0e209d39SAndroid Build Coastguard Worker // We only get here if we started in a pending linear-match node
130*0e209d39SAndroid Build Coastguard Worker // with more than maxLength remaining bytes.
131*0e209d39SAndroid Build Coastguard Worker return truncateAndStop();
132*0e209d39SAndroid Build Coastguard Worker }
133*0e209d39SAndroid Build Coastguard Worker for(;;) {
134*0e209d39SAndroid Build Coastguard Worker int32_t node=*pos++;
135*0e209d39SAndroid Build Coastguard Worker if(node>=kMinValueLead) {
136*0e209d39SAndroid Build Coastguard Worker // Deliver value for the byte sequence so far.
137*0e209d39SAndroid Build Coastguard Worker UBool isFinal=(UBool)(node&kValueIsFinal);
138*0e209d39SAndroid Build Coastguard Worker value_=readValue(pos, node>>1);
139*0e209d39SAndroid Build Coastguard Worker if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
140*0e209d39SAndroid Build Coastguard Worker pos_=nullptr;
141*0e209d39SAndroid Build Coastguard Worker } else {
142*0e209d39SAndroid Build Coastguard Worker pos_=skipValue(pos, node);
143*0e209d39SAndroid Build Coastguard Worker }
144*0e209d39SAndroid Build Coastguard Worker return true;
145*0e209d39SAndroid Build Coastguard Worker }
146*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && str_->length()==maxLength_) {
147*0e209d39SAndroid Build Coastguard Worker return truncateAndStop();
148*0e209d39SAndroid Build Coastguard Worker }
149*0e209d39SAndroid Build Coastguard Worker if(node<kMinLinearMatch) {
150*0e209d39SAndroid Build Coastguard Worker if(node==0) {
151*0e209d39SAndroid Build Coastguard Worker node=*pos++;
152*0e209d39SAndroid Build Coastguard Worker }
153*0e209d39SAndroid Build Coastguard Worker pos=branchNext(pos, node+1, errorCode);
154*0e209d39SAndroid Build Coastguard Worker if(pos==nullptr) {
155*0e209d39SAndroid Build Coastguard Worker return true; // Reached a final value.
156*0e209d39SAndroid Build Coastguard Worker }
157*0e209d39SAndroid Build Coastguard Worker } else {
158*0e209d39SAndroid Build Coastguard Worker // Linear-match node, append length bytes to str_.
159*0e209d39SAndroid Build Coastguard Worker int32_t length=node-kMinLinearMatch+1;
160*0e209d39SAndroid Build Coastguard Worker if(maxLength_>0 && str_->length()+length>maxLength_) {
161*0e209d39SAndroid Build Coastguard Worker str_->append(reinterpret_cast<const char *>(pos),
162*0e209d39SAndroid Build Coastguard Worker maxLength_-str_->length(), errorCode);
163*0e209d39SAndroid Build Coastguard Worker return truncateAndStop();
164*0e209d39SAndroid Build Coastguard Worker }
165*0e209d39SAndroid Build Coastguard Worker str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
166*0e209d39SAndroid Build Coastguard Worker pos+=length;
167*0e209d39SAndroid Build Coastguard Worker }
168*0e209d39SAndroid Build Coastguard Worker }
169*0e209d39SAndroid Build Coastguard Worker }
170*0e209d39SAndroid Build Coastguard Worker
171*0e209d39SAndroid Build Coastguard Worker StringPiece
getString() const172*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::getString() const {
173*0e209d39SAndroid Build Coastguard Worker return str_ == nullptr ? StringPiece() : str_->toStringPiece();
174*0e209d39SAndroid Build Coastguard Worker }
175*0e209d39SAndroid Build Coastguard Worker
176*0e209d39SAndroid Build Coastguard Worker UBool
truncateAndStop()177*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::truncateAndStop() {
178*0e209d39SAndroid Build Coastguard Worker pos_=nullptr;
179*0e209d39SAndroid Build Coastguard Worker value_=-1; // no real value for str
180*0e209d39SAndroid Build Coastguard Worker return true;
181*0e209d39SAndroid Build Coastguard Worker }
182*0e209d39SAndroid Build Coastguard Worker
183*0e209d39SAndroid Build Coastguard Worker // Branch node, needs to take the first outbound edge and push state for the rest.
184*0e209d39SAndroid Build Coastguard Worker const uint8_t *
branchNext(const uint8_t * pos,int32_t length,UErrorCode & errorCode)185*0e209d39SAndroid Build Coastguard Worker BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
186*0e209d39SAndroid Build Coastguard Worker while(length>kMaxBranchLinearSubNodeLength) {
187*0e209d39SAndroid Build Coastguard Worker ++pos; // ignore the comparison byte
188*0e209d39SAndroid Build Coastguard Worker // Push state for the greater-or-equal edge.
189*0e209d39SAndroid Build Coastguard Worker stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
190*0e209d39SAndroid Build Coastguard Worker stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
191*0e209d39SAndroid Build Coastguard Worker // Follow the less-than edge.
192*0e209d39SAndroid Build Coastguard Worker length>>=1;
193*0e209d39SAndroid Build Coastguard Worker pos=jumpByDelta(pos);
194*0e209d39SAndroid Build Coastguard Worker }
195*0e209d39SAndroid Build Coastguard Worker // List of key-value pairs where values are either final values or jump deltas.
196*0e209d39SAndroid Build Coastguard Worker // Read the first (key, value) pair.
197*0e209d39SAndroid Build Coastguard Worker uint8_t trieByte=*pos++;
198*0e209d39SAndroid Build Coastguard Worker int32_t node=*pos++;
199*0e209d39SAndroid Build Coastguard Worker UBool isFinal=(UBool)(node&kValueIsFinal);
200*0e209d39SAndroid Build Coastguard Worker int32_t value=readValue(pos, node>>1);
201*0e209d39SAndroid Build Coastguard Worker pos=skipValue(pos, node);
202*0e209d39SAndroid Build Coastguard Worker stack_->addElement((int32_t)(pos-bytes_), errorCode);
203*0e209d39SAndroid Build Coastguard Worker stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
204*0e209d39SAndroid Build Coastguard Worker str_->append((char)trieByte, errorCode);
205*0e209d39SAndroid Build Coastguard Worker if(isFinal) {
206*0e209d39SAndroid Build Coastguard Worker pos_=nullptr;
207*0e209d39SAndroid Build Coastguard Worker value_=value;
208*0e209d39SAndroid Build Coastguard Worker return nullptr;
209*0e209d39SAndroid Build Coastguard Worker } else {
210*0e209d39SAndroid Build Coastguard Worker return pos+value;
211*0e209d39SAndroid Build Coastguard Worker }
212*0e209d39SAndroid Build Coastguard Worker }
213*0e209d39SAndroid Build Coastguard Worker
214*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
215